summaryrefslogtreecommitdiffstats
path: root/COPYING-LGPLV3
blob: 65c5ca88a67c30becee01c5a8816d964b03862f9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
                   GNU LESSER GENERAL PUBLIC LICENSE
                       Version 3, 29 June 2007

 Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
 Everyone is permitted to copy and distribute verbatim copies
 of this license document, but changing it is not allowed.


  This version of the GNU Lesser General Public License incorporates
the terms and conditions of version 3 of the GNU General Public
License, supplemented by the additional permissions listed below.

  0. Additional Definitions.

  As used herein, "this License" refers to version 3 of the GNU Lesser
General Public License, and the "GNU GPL" refers to version 3 of the GNU
General Public License.

  "The Library" refers to a covered work governed by this License,
other than an Application or a Combined Work as defined below.

  An "Application" is any work that makes use of an interface provided
by the Library, but which is not otherwise based on the Library.
Defining a subclass of a class defined by the Library is deemed a mode
of using an interface provided by the Library.

  A "Combined Work" is a work produced by combining or linking an
Application with the Library.  The particular version of the Library
with which the Combined Work was made is also called the "Linked
Version".

  The "Minimal Corresponding Source" for a Combined Work means the
Corresponding Source for the Combined Work, excluding any source code
for portions of the Combined Work that, considered in isolation, are
based on the Application, and not on the Linked Version.

  The "Corresponding Application Code" for a Combined Work means the
object code and/or source code for the Application, including any data
and utility programs needed for reproducing the Combined Work from the
Application, but excluding the System Libraries of the Combined Work.

  1. Exception to Section 3 of the GNU GPL.

  You may convey a covered work under sections 3 and 4 of this License
without being bound by section 3 of the GNU GPL.

  2. Conveying Modified Versions.

  If you modify a copy of the Library, and, in your modifications, a
facility refers to a function or data to be supplied by an Application
that uses the facility (other than as an argument passed when the
facility is invoked), then you may convey a copy of the modified
version:

   a) under this License, provided that you make a good faith effort to
   ensure that, in the event an Application does not supply the
   function or data, the facility still operates, and performs
   whatever part of its purpose remains meaningful, or

   b) under the GNU GPL, with none of the additional permissions of
   this License applicable to that copy.

  3. Object Code Incorporating Material from Library Header Files.

  The object code form of an Application may incorporate material from
a header file that is part of the Library.  You may convey such object
code under terms of your choice, provided that, if the incorporated
material is not limited to numerical parameters, data structure
layouts and accessors, or small macros, inline functions and templates
(ten or fewer lines in length), you do both of the following:

   a) Give prominent notice with each copy of the object code that the
   Library is used in it and that the Library and its use are
   covered by this License.

   b) Accompany the object code with a copy of the GNU GPL and this license
   document.

  4. Combined Works.

  You may convey a Combined Work under terms of your choice that,
taken together, effectively do not restrict modification of the
portions of the Library contained in the Combined Work and reverse
engineering for debugging such modifications, if you also do each of
the following:

   a) Give prominent notice with each copy of the Combined Work that
   the Library is used in it and that the Library and its use are
   covered by this License.

   b) Accompany the Combined Work with a copy of the GNU GPL and this license
   document.

   c) For a Combined Work that displays copyright notices during
   execution, include the copyright notice for the Library among
   these notices, as well as a reference directing the user to the
   copies of the GNU GPL and this license document.

   d) Do one of the following:

       0) Convey the Minimal Corresponding Source under the terms of this
       License, and the Corresponding Application Code in a form
       suitable for, and under terms that permit, the user to
       recombine or relink the Application with a modified version of
       the Linked Version to produce a modified Combined Work, in the
       manner specified by section 6 of the GNU GPL for conveying
       Corresponding Source.

       1) Use a suitable shared library mechanism for linking with the
       Library.  A suitable mechanism is one that (a) uses at run time
       a copy of the Library already present on the user's computer
       system, and (b) will operate properly with a modified version
       of the Library that is interface-compatible with the Linked
       Version.

   e) Provide Installation Information, but only if you would otherwise
   be required to provide such information under section 6 of the
   GNU GPL, and only to the extent that such information is
   necessary to install and execute a modified version of the
   Combined Work produced by recombining or relinking the
   Application with a modified version of the Linked Version. (If
   you use option 4d0, the Installation Information must accompany
   the Minimal Corresponding Source and Corresponding Application
   Code. If you use option 4d1, you must provide the Installation
   Information in the manner specified by section 6 of the GNU GPL
   for conveying Corresponding Source.)

  5. Combined Libraries.

  You may place library facilities that are a work based on the
Library side by side in a single library together with other library
facilities that are not Applications and are not covered by this
License, and convey such a combined library under terms of your
choice, if you do both of the following:

   a) Accompany the combined library with a copy of the same work based
   on the Library, uncombined with any other library facilities,
   conveyed under the terms of this License.

   b) Give prominent notice with the combined library that part of it
   is a work based on the Library, and explaining where to find the
   accompanying uncombined form of the same work.

  6. Revised Versions of the GNU Lesser General Public License.

  The Free Software Foundation may publish revised and/or new versions
of the GNU Lesser General Public License from time to time. Such new
versions will be similar in spirit to the present version, but may
differ in detail to address new problems or concerns.

  Each version is given a distinguishing version number. If the
Library as you received it specifies that a certain numbered version
of the GNU Lesser General Public License "or any later version"
applies to it, you have the option of following the terms and
conditions either of that published version or of any later version
published by the Free Software Foundation. If the Library as you
received it does not specify a version number of the GNU Lesser
General Public License, you may choose any version of the GNU Lesser
General Public License ever published by the Free Software Foundation.

  If the Library as you received it specifies that a proxy can decide
whether future versions of the GNU Lesser General Public License shall
apply, that proxy's public statement of acceptance of any version is
permanent authorization for you to choose that version for the
Library.
> -rw-r--r--xlators/cluster/afr/src/afr-self-heald.c1787
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.h65
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c2555
-rw-r--r--xlators/cluster/afr/src/afr-transaction.h53
-rw-r--r--xlators/cluster/afr/src/afr.c3095
-rw-r--r--xlators/cluster/afr/src/afr.h1432
-rw-r--r--xlators/cluster/afr/src/pump.c2641
-rw-r--r--xlators/cluster/afr/src/pump.h78
-rw-r--r--xlators/cluster/dht/src/Makefile.am32
-rw-r--r--xlators/cluster/dht/src/dht-common.c6421
-rw-r--r--xlators/cluster/dht/src/dht-common.h857
-rw-r--r--xlators/cluster/dht/src/dht-diskusage.c503
-rw-r--r--xlators/cluster/dht/src/dht-hashfn.c147
-rw-r--r--xlators/cluster/dht/src/dht-helper.c1151
-rw-r--r--xlators/cluster/dht/src/dht-inode-read.c1139
-rw-r--r--xlators/cluster/dht/src/dht-inode-write.c1013
-rw-r--r--xlators/cluster/dht/src/dht-layout.c1041
-rw-r--r--xlators/cluster/dht/src/dht-linkfile.c428
-rw-r--r--xlators/cluster/dht/src/dht-mem-types.h35
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c1815
-rw-r--r--xlators/cluster/dht/src/dht-rename.c1128
-rw-r--r--xlators/cluster/dht/src/dht-selfheal.c1239
-rw-r--r--xlators/cluster/dht/src/dht-shared.c758
-rw-r--r--xlators/cluster/dht/src/dht.c307
-rw-r--r--xlators/cluster/dht/src/nufa.c1000
-rw-r--r--xlators/cluster/dht/src/switch.c904
-rw-r--r--xlators/cluster/ha/src/Makefile.am7
-rw-r--r--xlators/cluster/ha/src/ha-helpers.c43
-rw-r--r--xlators/cluster/ha/src/ha-mem-types.h26
-rw-r--r--xlators/cluster/ha/src/ha.c1268
-rw-r--r--xlators/cluster/ha/src/ha.h30
-rw-r--r--xlators/cluster/map/src/Makefile.am7
-rw-r--r--xlators/cluster/map/src/map-helper.c29
-rw-r--r--xlators/cluster/map/src/map-mem-types.h24
-rw-r--r--xlators/cluster/map/src/map.c541
-rw-r--r--xlators/cluster/map/src/map.h25
-rw-r--r--xlators/cluster/stripe/src/Makefile.am14
-rw-r--r--xlators/cluster/stripe/src/stripe-helpers.c675
-rw-r--r--xlators/cluster/stripe/src/stripe-mem-types.h31
-rw-r--r--xlators/cluster/stripe/src/stripe.c6098
-rw-r--r--xlators/cluster/stripe/src/stripe.h212
-rw-r--r--xlators/cluster/unify/src/Makefile.am16
-rw-r--r--xlators/cluster/unify/src/unify-self-heal.c1225
-rw-r--r--xlators/cluster/unify/src/unify.c4506
-rw-r--r--xlators/cluster/unify/src/unify.h132
-rw-r--r--xlators/debug/error-gen/src/Makefile.am9
-rw-r--r--xlators/debug/error-gen/src/error-gen-mem-types.h20
-rw-r--r--xlators/debug/error-gen/src/error-gen.c2954
-rw-r--r--xlators/debug/error-gen/src/error-gen.h48
-rw-r--r--xlators/debug/io-stats/src/Makefile.am9
-rw-r--r--xlators/debug/io-stats/src/io-stats-mem-types.h24
-rw-r--r--xlators/debug/io-stats/src/io-stats.c3335
-rw-r--r--xlators/debug/trace/src/Makefile.am8
-rw-r--r--xlators/debug/trace/src/trace-mem-types.h21
-rw-r--r--xlators/debug/trace/src/trace.c5398
-rw-r--r--xlators/debug/trace/src/trace.h98
-rw-r--r--xlators/encryption/Makefile.am2
-rw-r--r--xlators/encryption/crypt/Makefile.am (renamed from xlators/cluster/unify/Makefile.am)0
-rw-r--r--xlators/encryption/crypt/src/Makefile.am24
-rw-r--r--xlators/encryption/crypt/src/atom.c962
-rw-r--r--xlators/encryption/crypt/src/crypt-common.h141
-rw-r--r--xlators/encryption/crypt/src/crypt-mem-types.h43
-rw-r--r--xlators/encryption/crypt/src/crypt.c4498
-rw-r--r--xlators/encryption/crypt/src/crypt.h899
-rw-r--r--xlators/encryption/crypt/src/data.c769
-rw-r--r--xlators/encryption/crypt/src/keys.c302
-rw-r--r--xlators/encryption/crypt/src/metadata.c605
-rw-r--r--xlators/encryption/crypt/src/metadata.h74
-rw-r--r--xlators/encryption/rot-13/src/Makefile.am7
-rw-r--r--xlators/encryption/rot-13/src/rot-13.c94
-rw-r--r--xlators/encryption/rot-13/src/rot-13.h20
-rw-r--r--xlators/features/Makefile.am5
-rw-r--r--xlators/features/changelog/Makefile.am3
-rw-r--r--xlators/features/changelog/lib/Makefile.am3
-rw-r--r--xlators/features/changelog/lib/examples/c/get-changes.c87
-rw-r--r--xlators/features/changelog/lib/examples/python/changes.py32
-rw-r--r--xlators/features/changelog/lib/examples/python/libgfchangelog.py64
-rw-r--r--xlators/features/changelog/lib/src/Makefile.am37
-rw-r--r--xlators/features/changelog/lib/src/changelog.h31
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-helpers.c180
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-helpers.h97
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-process.c571
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog.c515
-rw-r--r--xlators/features/changelog/src/Makefile.am19
-rw-r--r--xlators/features/changelog/src/changelog-encoders.c176
-rw-r--r--xlators/features/changelog/src/changelog-encoders.h46
-rw-r--r--xlators/features/changelog/src/changelog-helpers.c693
-rw-r--r--xlators/features/changelog/src/changelog-helpers.h395
-rw-r--r--xlators/features/changelog/src/changelog-mem-types.h29
-rw-r--r--xlators/features/changelog/src/changelog-misc.h101
-rw-r--r--xlators/features/changelog/src/changelog-notifier.c314
-rw-r--r--xlators/features/changelog/src/changelog-notifier.h19
-rw-r--r--xlators/features/changelog/src/changelog-rt.c72
-rw-r--r--xlators/features/changelog/src/changelog-rt.h33
-rw-r--r--xlators/features/changelog/src/changelog.c1477
-rw-r--r--xlators/features/compress/Makefile.am3
-rw-r--r--xlators/features/compress/src/Makefile.am17
-rw-r--r--xlators/features/compress/src/cdc-helper.c547
-rw-r--r--xlators/features/compress/src/cdc-mem-types.h22
-rw-r--r--xlators/features/compress/src/cdc.c342
-rw-r--r--xlators/features/compress/src/cdc.h107
-rw-r--r--xlators/features/filter/src/Makefile.am9
-rw-r--r--xlators/features/filter/src/filter-mem-types.h20
-rw-r--r--xlators/features/filter/src/filter.c530
-rw-r--r--xlators/features/gfid-access/Makefile.am (renamed from xlators/performance/stat-prefetch/Makefile.am)0
-rw-r--r--xlators/features/gfid-access/src/Makefile.am15
-rw-r--r--xlators/features/gfid-access/src/gfid-access-mem-types.h23
-rw-r--r--xlators/features/gfid-access/src/gfid-access.c1172
-rw-r--r--xlators/features/gfid-access/src/gfid-access.h128
-rw-r--r--xlators/features/glupy/Makefile.am3
-rw-r--r--xlators/features/glupy/doc/README.md44
-rw-r--r--xlators/features/glupy/doc/TESTING9
-rw-r--r--xlators/features/glupy/doc/test.vol10
-rw-r--r--xlators/features/glupy/src/Makefile.am20
-rw-r--r--xlators/features/glupy/src/debug-trace.py774
-rw-r--r--xlators/features/glupy/src/glupy.c2470
-rw-r--r--xlators/features/glupy/src/glupy.h69
-rw-r--r--xlators/features/glupy/src/gluster.py841
-rw-r--r--xlators/features/glupy/src/helloworld.py19
-rw-r--r--xlators/features/glupy/src/negative.py92
-rw-r--r--xlators/features/index/Makefile.am3
-rw-r--r--xlators/features/index/src/Makefile.am17
-rw-r--r--xlators/features/index/src/index-mem-types.h22
-rw-r--r--xlators/features/index/src/index.c1489
-rw-r--r--xlators/features/index/src/index.h73
-rw-r--r--xlators/features/locks/src/Makefile.am17
-rw-r--r--xlators/features/locks/src/clear.c424
-rw-r--r--xlators/features/locks/src/clear.h76
-rw-r--r--xlators/features/locks/src/common.c1414
-rw-r--r--xlators/features/locks/src/common.h179
-rw-r--r--xlators/features/locks/src/entrylk.c848
-rw-r--r--xlators/features/locks/src/inodelk.c825
-rw-r--r--xlators/features/locks/src/internal.c896
-rw-r--r--xlators/features/locks/src/locks-mem-types.h29
-rw-r--r--xlators/features/locks/src/locks.h195
-rw-r--r--xlators/features/locks/src/posix.c3042
-rw-r--r--xlators/features/locks/src/reservelk.c443
-rw-r--r--xlators/features/locks/tests/unit-test.c22
-rw-r--r--xlators/features/mac-compat/Makefile.am (renamed from xlators/storage/bdb/Makefile.am)0
-rw-r--r--xlators/features/mac-compat/src/Makefile.am14
-rw-r--r--xlators/features/mac-compat/src/mac-compat.c237
-rw-r--r--xlators/features/marker/Makefile.am3
-rw-r--r--xlators/features/marker/src/Makefile.am17
-rw-r--r--xlators/features/marker/src/marker-common.c69
-rw-r--r--xlators/features/marker/src/marker-common.h27
-rw-r--r--xlators/features/marker/src/marker-mem-types.h25
-rw-r--r--xlators/features/marker/src/marker-quota-helper.c414
-rw-r--r--xlators/features/marker/src/marker-quota-helper.h76
-rw-r--r--xlators/features/marker/src/marker-quota.c2520
-rw-r--r--xlators/features/marker/src/marker-quota.h130
-rw-r--r--xlators/features/marker/src/marker.c2862
-rw-r--r--xlators/features/marker/src/marker.h138
-rw-r--r--xlators/features/path-convertor/src/Makefile.am7
-rw-r--r--xlators/features/path-convertor/src/path-mem-types.h22
-rw-r--r--xlators/features/path-convertor/src/path.c301
-rw-r--r--xlators/features/protect/Makefile.am3
-rw-r--r--xlators/features/protect/src/Makefile.am21
-rw-r--r--xlators/features/protect/src/prot_client.c215
-rw-r--r--xlators/features/protect/src/prot_dht.c168
-rw-r--r--xlators/features/protect/src/prot_server.c51
-rw-r--r--xlators/features/qemu-block/Makefile.am1
-rw-r--r--xlators/features/qemu-block/src/Makefile.am155
-rw-r--r--xlators/features/qemu-block/src/bdrv-xlator.c397
-rw-r--r--xlators/features/qemu-block/src/bh-syncop.c48
-rw-r--r--xlators/features/qemu-block/src/clock-timer.c60
-rw-r--r--xlators/features/qemu-block/src/coroutine-synctask.c116
-rw-r--r--xlators/features/qemu-block/src/monitor-logging.c50
-rw-r--r--xlators/features/qemu-block/src/qb-coroutines.c662
-rw-r--r--xlators/features/qemu-block/src/qb-coroutines.h30
-rw-r--r--xlators/features/qemu-block/src/qemu-block-memory-types.h25
-rw-r--r--xlators/features/qemu-block/src/qemu-block.c1140
-rw-r--r--xlators/features/qemu-block/src/qemu-block.h109
-rw-r--r--xlators/features/quiesce/Makefile.am3
-rw-r--r--xlators/features/quiesce/src/Makefile.am15
-rw-r--r--xlators/features/quiesce/src/quiesce-mem-types.h20
-rw-r--r--xlators/features/quiesce/src/quiesce.c2610
-rw-r--r--xlators/features/quiesce/src/quiesce.h51
-rw-r--r--xlators/features/quota/src/Makefile.am12
-rw-r--r--xlators/features/quota/src/quota-mem-types.h27
-rw-r--r--xlators/features/quota/src/quota.c3906
-rw-r--r--xlators/features/quota/src/quota.h151
-rw-r--r--xlators/features/read-only/Makefile.am3
-rw-r--r--xlators/features/read-only/src/Makefile.am22
-rw-r--r--xlators/features/read-only/src/read-only-common.c239
-rw-r--r--xlators/features/read-only/src/read-only-common.h115
-rw-r--r--xlators/features/read-only/src/read-only.c77
-rw-r--r--xlators/features/read-only/src/worm.c89
-rw-r--r--xlators/features/trash/src/Makefile.am9
-rw-r--r--xlators/features/trash/src/trash-mem-types.h22
-rw-r--r--xlators/features/trash/src/trash.c1971
-rw-r--r--xlators/features/trash/src/trash.h79
-rw-r--r--xlators/lib/src/libxlator.c470
-rw-r--r--xlators/lib/src/libxlator.h153
-rw-r--r--xlators/meta/src/Makefile.am5
-rw-r--r--xlators/meta/src/meta-mem-types.h25
-rw-r--r--xlators/meta/src/meta.c88
-rw-r--r--xlators/meta/src/meta.h20
-rw-r--r--xlators/meta/src/misc.c20
-rw-r--r--xlators/meta/src/misc.h20
-rw-r--r--xlators/meta/src/tree.c43
-rw-r--r--xlators/meta/src/tree.h20
-rw-r--r--xlators/meta/src/view.c20
-rw-r--r--xlators/meta/src/view.h20
-rw-r--r--xlators/mgmt/Makefile.am3
-rw-r--r--xlators/mgmt/glusterd/Makefile.am3
-rw-r--r--xlators/mgmt/glusterd/src/Makefile.am50
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c1953
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-geo-rep.c4236
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c4237
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handshake.c1366
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.c531
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.h89
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-locks.c637
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-locks.h51
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-log-ops.c271
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mem-types.h75
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c924
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.c1893
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.h45
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.c693
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.h42
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c6243
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.h299
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.c492
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.h54
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quota.c839
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rebalance.c749
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-replace-brick.c2024
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rpc-ops.c1974
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.c1109
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.h217
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot.c5590
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.c3564
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.h164
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.c1639
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.h71
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c8894
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h634
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c4065
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.h176
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c2225
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c1452
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.c1597
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h952
-rw-r--r--xlators/mount/fuse/src/Makefile.am34
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c6369
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.h537
-rw-r--r--xlators/mount/fuse/src/fuse-extra.c155
-rw-r--r--xlators/mount/fuse/src/fuse-extra.h42
-rw-r--r--xlators/mount/fuse/src/fuse-helpers.c605
-rw-r--r--xlators/mount/fuse/src/fuse-mem-types.h28
-rw-r--r--xlators/mount/fuse/src/fuse-resolve.c724
-rwxr-xr-xxlators/mount/fuse/utils/mount.glusterfs.in508
-rwxr-xr-xxlators/mount/fuse/utils/mount_glusterfs.in28
-rw-r--r--xlators/nfs/Makefile.am3
-rw-r--r--xlators/nfs/server/Makefile.am3
-rw-r--r--xlators/nfs/server/src/Makefile.am24
-rw-r--r--xlators/nfs/server/src/acl3.c708
-rw-r--r--xlators/nfs/server/src/acl3.h31
-rw-r--r--xlators/nfs/server/src/mount3.c2759
-rw-r--r--xlators/nfs/server/src/mount3.h131
-rw-r--r--xlators/nfs/server/src/mount3udp_svc.c189
-rw-r--r--xlators/nfs/server/src/nfs-common.c465
-rw-r--r--xlators/nfs/server/src/nfs-common.h81
-rw-r--r--xlators/nfs/server/src/nfs-fops.c1661
-rw-r--r--xlators/nfs/server/src/nfs-fops.h248
-rw-r--r--xlators/nfs/server/src/nfs-generics.c345
-rw-r--r--xlators/nfs/server/src/nfs-generics.h168
-rw-r--r--xlators/nfs/server/src/nfs-inodes.c608
-rw-r--r--xlators/nfs/server/src/nfs-inodes.h76
-rw-r--r--xlators/nfs/server/src/nfs-mem-types.h53
-rw-r--r--xlators/nfs/server/src/nfs.c1825
-rw-r--r--xlators/nfs/server/src/nfs.h135
-rw-r--r--xlators/nfs/server/src/nfs3-fh.c188
-rw-r--r--xlators/nfs/server/src/nfs3-fh.h103
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.c3881
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.h337
-rw-r--r--xlators/nfs/server/src/nfs3.c5630
-rw-r--r--xlators/nfs/server/src/nfs3.h285
-rw-r--r--xlators/nfs/server/src/nlm4.c2525
-rw-r--r--xlators/nfs/server/src/nlm4.h77
-rw-r--r--xlators/nfs/server/src/nlmcbk_svc.c117
-rw-r--r--xlators/performance/Makefile.am2
-rw-r--r--xlators/performance/io-cache/src/Makefile.am10
-rw-r--r--xlators/performance/io-cache/src/io-cache.c2609
-rw-r--r--xlators/performance/io-cache/src/io-cache.h345
-rw-r--r--xlators/performance/io-cache/src/ioc-inode.c299
-rw-r--r--xlators/performance/io-cache/src/ioc-mem-types.h29
-rw-r--r--xlators/performance/io-cache/src/page.c1333
-rw-r--r--xlators/performance/io-threads/src/Makefile.am9
-rw-r--r--xlators/performance/io-threads/src/io-threads.c2712
-rw-r--r--xlators/performance/io-threads/src/io-threads.h178
-rw-r--r--xlators/performance/io-threads/src/iot-mem-types.h22
-rw-r--r--xlators/performance/md-cache/Makefile.am1
-rw-r--r--xlators/performance/md-cache/src/Makefile.am25
-rw-r--r--xlators/performance/md-cache/src/md-cache-mem-types.h24
-rw-r--r--xlators/performance/md-cache/src/md-cache.c2303
-rw-r--r--xlators/performance/open-behind/Makefile.am1
-rw-r--r--xlators/performance/open-behind/src/Makefile.am15
-rw-r--r--xlators/performance/open-behind/src/open-behind-mem-types.h21
-rw-r--r--xlators/performance/open-behind/src/open-behind.c1001
-rw-r--r--xlators/performance/quick-read/Makefile.am3
-rw-r--r--xlators/performance/quick-read/src/Makefile.am15
-rw-r--r--xlators/performance/quick-read/src/quick-read-mem-types.h27
-rw-r--r--xlators/performance/quick-read/src/quick-read.c1147
-rw-r--r--xlators/performance/quick-read/src/quick-read.h81
-rw-r--r--xlators/performance/read-ahead/src/Makefile.am9
-rw-r--r--xlators/performance/read-ahead/src/page.c696
-rw-r--r--xlators/performance/read-ahead/src/read-ahead-mem-types.h26
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.c1614
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.h148
-rw-r--r--xlators/performance/readdir-ahead/Makefile.am3
-rw-r--r--xlators/performance/readdir-ahead/src/Makefile.am15
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h24
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.c560
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.h46
-rw-r--r--xlators/performance/stat-prefetch/src/Makefile.am11
-rw-r--r--xlators/performance/stat-prefetch/src/stat-prefetch.c508
-rw-r--r--xlators/performance/stat-prefetch/src/stat-prefetch.h32
-rw-r--r--xlators/performance/symlink-cache/src/Makefile.am7
-rw-r--r--xlators/performance/symlink-cache/src/symlink-cache.c78
-rw-r--r--xlators/performance/write-behind/src/Makefile.am9
-rw-r--r--xlators/performance/write-behind/src/write-behind-mem-types.h26
-rw-r--r--xlators/performance/write-behind/src/write-behind.c3382
-rw-r--r--xlators/playground/Makefile.am2
-rw-r--r--xlators/playground/template/Makefile.am2
-rw-r--r--xlators/playground/template/src/Makefile.am16
-rw-r--r--xlators/playground/template/src/template.c49
-rw-r--r--xlators/playground/template/src/template.h24
-rw-r--r--xlators/protocol/Makefile.am4
-rw-r--r--xlators/protocol/auth/Makefile.am1
-rw-r--r--xlators/protocol/auth/addr/Makefile.am1
-rw-r--r--xlators/protocol/auth/addr/src/Makefile.am14
-rw-r--r--xlators/protocol/auth/addr/src/addr.c229
-rw-r--r--xlators/protocol/auth/login/Makefile.am1
-rw-r--r--xlators/protocol/auth/login/src/Makefile.am12
-rw-r--r--xlators/protocol/auth/login/src/login.c128
-rw-r--r--xlators/protocol/client/Makefile.am2
-rw-r--r--xlators/protocol/client/src/Makefile.am18
-rw-r--r--xlators/protocol/client/src/client-callback.c56
-rw-r--r--xlators/protocol/client/src/client-handshake.c1960
-rw-r--r--xlators/protocol/client/src/client-helpers.c349
-rw-r--r--xlators/protocol/client/src/client-lk.c573
-rw-r--r--xlators/protocol/client/src/client-mem-types.h25
-rw-r--r--xlators/protocol/client/src/client-protocol.c6306
-rw-r--r--xlators/protocol/client/src/client-protocol.h170
-rw-r--r--xlators/protocol/client/src/client-rpc-fops.c6203
-rw-r--r--xlators/protocol/client/src/client.c2867
-rw-r--r--xlators/protocol/client/src/client.h257
-rw-r--r--xlators/protocol/client/src/saved-frames.c191
-rw-r--r--xlators/protocol/client/src/saved-frames.h79
-rw-r--r--xlators/protocol/server/Makefile.am2
-rw-r--r--xlators/protocol/server/src/Makefile.am28
-rw-r--r--xlators/protocol/server/src/authenticate.c253
-rw-r--r--xlators/protocol/server/src/authenticate.h51
-rw-r--r--xlators/protocol/server/src/server-dentry.c413
-rw-r--r--xlators/protocol/server/src/server-handshake.c781
-rw-r--r--xlators/protocol/server/src/server-helpers.c1829
-rw-r--r--xlators/protocol/server/src/server-helpers.h101
-rw-r--r--xlators/protocol/server/src/server-mem-types.h30
-rw-r--r--xlators/protocol/server/src/server-protocol.c7847
-rw-r--r--xlators/protocol/server/src/server-protocol.h158
-rw-r--r--xlators/protocol/server/src/server-resolve.c598
-rw-r--r--xlators/protocol/server/src/server-rpc-fops.c6179
-rw-r--r--xlators/protocol/server/src/server.c1214
-rw-r--r--xlators/protocol/server/src/server.h196
-rw-r--r--xlators/storage/Makefile.am8
-rw-r--r--xlators/storage/bd/Makefile.am3
-rw-r--r--xlators/storage/bd/src/Makefile.am20
-rw-r--r--xlators/storage/bd/src/bd-aio.c527
-rw-r--r--xlators/storage/bd/src/bd-aio.h41
-rw-r--r--xlators/storage/bd/src/bd-helper.c783
-rw-r--r--xlators/storage/bd/src/bd.c2404
-rw-r--r--xlators/storage/bd/src/bd.h178
-rw-r--r--xlators/storage/bdb/src/Makefile.am18
-rw-r--r--xlators/storage/bdb/src/bctx.c341
-rw-r--r--xlators/storage/bdb/src/bdb-ll.c1460
-rw-r--r--xlators/storage/bdb/src/bdb.c3624
-rw-r--r--xlators/storage/bdb/src/bdb.h530
-rw-r--r--xlators/storage/posix/src/Makefile.am18
-rw-r--r--xlators/storage/posix/src/posix-aio.c569
-rw-r--r--xlators/storage/posix/src/posix-aio.h39
-rw-r--r--xlators/storage/posix/src/posix-handle.c744
-rw-r--r--xlators/storage/posix/src/posix-handle.h143
-rw-r--r--xlators/storage/posix/src/posix-helpers.c1391
-rw-r--r--xlators/storage/posix/src/posix-mem-types.h27
-rw-r--r--xlators/storage/posix/src/posix.c5742
-rw-r--r--xlators/storage/posix/src/posix.h158
-rw-r--r--xlators/system/Makefile.am1
-rw-r--r--xlators/system/posix-acl/Makefile.am1
-rw-r--r--xlators/system/posix-acl/src/Makefile.am23
-rw-r--r--xlators/system/posix-acl/src/posix-acl-xattr.c180
-rw-r--r--xlators/system/posix-acl/src/posix-acl-xattr.h26
-rw-r--r--xlators/system/posix-acl/src/posix-acl.c2183
-rw-r--r--xlators/system/posix-acl/src/posix-acl.h30
424 files changed, 244664 insertions, 67022 deletions
diff --git a/xlators/Makefile.am b/xlators/Makefile.am
index 2abb52194..f60fa85ce 100644
--- a/xlators/Makefile.am
+++ b/xlators/Makefile.am
@@ -1,3 +1,4 @@
-SUBDIRS = cluster storage protocol performance debug features encryption mount
+SUBDIRS = cluster storage protocol performance debug features encryption mount nfs mgmt system \
+ playground
-CLEANFILES =
+CLEANFILES =
diff --git a/xlators/bindings/python/src/Makefile.am b/xlators/bindings/python/src/Makefile.am
index c0b9141c6..90370d861 100644
--- a/xlators/bindings/python/src/Makefile.am
+++ b/xlators/bindings/python/src/Makefile.am
@@ -9,7 +9,7 @@ pythondir = $(xlatordir)/python
python_so_SOURCES = python.c
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall \
+AM_CFLAGS = -fPIC $(GF_CPPFLAGS) -Wall \
-I$(top_srcdir)/libglusterfs/src -shared -nostartfiles \
$(PYTHON_CPPLAGS) -DGLUSTER_PYTHON_PATH=\"$(pythondir)\"
diff --git a/xlators/bindings/python/src/gluster.py b/xlators/bindings/python/src/gluster.py
index ee0eb1310..337c983ec 100644
--- a/xlators/bindings/python/src/gluster.py
+++ b/xlators/bindings/python/src/gluster.py
@@ -1,19 +1,12 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
+
+# Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
from ctypes import *
from glustertypes import *
from glusterstack import *
diff --git a/xlators/bindings/python/src/glusterstack.py b/xlators/bindings/python/src/glusterstack.py
index ba24c8165..0c071ae98 100644
--- a/xlators/bindings/python/src/glusterstack.py
+++ b/xlators/bindings/python/src/glusterstack.py
@@ -1,19 +1,12 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
+
+# Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
from ctypes import *
from glustertypes import *
diff --git a/xlators/bindings/python/src/glustertypes.py b/xlators/bindings/python/src/glustertypes.py
index e9069d07c..98437d22e 100644
--- a/xlators/bindings/python/src/glustertypes.py
+++ b/xlators/bindings/python/src/glustertypes.py
@@ -1,19 +1,12 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
+
+# Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
from ctypes import *
import collections
diff --git a/xlators/bindings/python/src/python.c b/xlators/bindings/python/src/python.c
index 7f32d7f29..9b96790de 100644
--- a/xlators/bindings/python/src/python.c
+++ b/xlators/bindings/python/src/python.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2007-2009 Chris AtLee <chris@atlee.ca>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#include <Python.h>
#ifndef _CONFIG_H
@@ -45,7 +35,7 @@ python_writev (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
struct iovec *vector,
- int32_t count,
+ int32_t count,
off_t offset)
{
python_private_t *priv = (python_private_t *)this->private;
@@ -77,9 +67,6 @@ struct xlator_fops fops = {
.writev = python_writev
};
-struct xlator_mops mops = {
-};
-
static PyObject *
AnonModule_FromFile (const char* fname)
{
@@ -151,7 +138,7 @@ init (xlator_t *this)
Py_InitializeEx(0);
if (!this->children) {
- gf_log ("python", GF_LOG_ERROR,
+ gf_log ("python", GF_LOG_ERROR,
"FATAL: python should have exactly one child");
return -1;
}
@@ -169,7 +156,7 @@ init (xlator_t *this)
}
priv->pInterp = Py_NewInterpreter();
-
+
// Adjust python's path
PyObject *syspath = PySys_GetObject("path");
PyObject *path = PyString_FromString(GLUSTER_PYTHON_PATH);
@@ -191,7 +178,7 @@ init (xlator_t *this)
priv->pVectorType = PyObject_GetAttrString(priv->pGlusterModule, "iovec");
gf_log("python", GF_LOG_DEBUG, "Loading script...%s", priv->scriptname);
-
+
priv->pScriptModule = AnonModule_FromFile(priv->scriptname);
if (!priv->pScriptModule || PyErr_Occurred())
{
@@ -220,7 +207,7 @@ init (xlator_t *this)
return 0;
}
-void
+void
fini (xlator_t *this)
{
python_private_t *priv = (python_private_t*)(this->private);
diff --git a/xlators/bindings/python/src/testxlator.py b/xlators/bindings/python/src/testxlator.py
index 507455c85..59a991dca 100644
--- a/xlators/bindings/python/src/testxlator.py
+++ b/xlators/bindings/python/src/testxlator.py
@@ -1,19 +1,12 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
+"""
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+"""
"""
This is a test translator written in python.
diff --git a/xlators/cluster/Makefile.am b/xlators/cluster/Makefile.am
index a6ddb3564..0990822a7 100644
--- a/xlators/cluster/Makefile.am
+++ b/xlators/cluster/Makefile.am
@@ -1,3 +1,3 @@
-SUBDIRS = unify stripe afr dht ha map
+SUBDIRS = stripe afr dht
CLEANFILES =
diff --git a/xlators/cluster/afr/src/Makefile.am b/xlators/cluster/afr/src/Makefile.am
index 1bde9e5ba..35d18a6c0 100644
--- a/xlators/cluster/afr/src/Makefile.am
+++ b/xlators/cluster/afr/src/Makefile.am
@@ -1,20 +1,37 @@
-xlator_LTLIBRARIES = afr.la
+xlator_LTLIBRARIES = afr.la pump.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
-afr_la_LDFLAGS = -module -avoidversion
+afr_common_source = afr-dir-read.c afr-dir-write.c afr-inode-read.c \
+ afr-inode-write.c afr-open.c afr-transaction.c afr-self-heal-data.c \
+ afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c \
+ afr-self-heal-algorithm.c afr-lk-common.c afr-self-heald.c \
+ $(top_builddir)/xlators/lib/src/libxlator.c
-afr_la_SOURCES = afr.c afr-dir-read.c afr-dir-write.c afr-inode-read.c afr-inode-write.c afr-transaction.c afr-self-heal-data.c afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c
+afr_la_LDFLAGS = -module -avoid-version
+afr_la_SOURCES = $(afr_common_source) afr.c
afr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-self-heal-common.h
+pump_la_LDFLAGS = -module -avoid-version
+pump_la_SOURCES = $(afr_common_source) pump.c
+pump_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h \
+ afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-self-heal-common.h \
+ afr-self-heal-algorithm.h pump.h afr-mem-types.h afr-common.c \
+ afr-self-heald.h $(top_builddir)/xlators/lib/src/libxlator.h \
+ $(top_builddir)/glusterfsd/src/glusterfsd.h
-CLEANFILES =
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/xlators/lib/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
uninstall-local:
rm -f $(DESTDIR)$(xlatordir)/replicate.so
+ rm -f $(DESTDIR)$(xlatordir)/pump.so
install-data-hook:
- ln -sf afr.so $(DESTDIR)$(xlatordir)/replicate.so \ No newline at end of file
+ ln -sf afr.so $(DESTDIR)$(xlatordir)/replicate.so
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
new file mode 100644
index 000000000..af01f2ef2
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -0,0 +1,4591 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <libgen.h>
+#include <unistd.h>
+#include <fnmatch.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <signal.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "afr.h"
+#include "dict.h"
+#include "xlator.h"
+#include "hashfn.h"
+#include "logging.h"
+#include "stack.h"
+#include "list.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "common-utils.h"
+#include "compat-errno.h"
+#include "compat.h"
+#include "byte-order.h"
+#include "statedump.h"
+#include "inode.h"
+
+#include "fd.h"
+
+#include "afr-inode-read.h"
+#include "afr-inode-write.h"
+#include "afr-dir-read.h"
+#include "afr-dir-write.h"
+#include "afr-transaction.h"
+#include "afr-self-heal.h"
+#include "afr-self-heal-common.h"
+#include "afr-self-heald.h"
+#include "pump.h"
+
+#define AFR_ICTX_OPENDIR_DONE_MASK 0x0000000100000000ULL
+#define AFR_ICTX_READ_CHILD_MASK 0x00000000FFFFFFFFULL
+#define AFR_STATISTICS_HISTORY_SIZE 50
+int
+afr_lookup_done_success_action (call_frame_t *frame, xlator_t *this,
+ gf_boolean_t fail_conflict);
+void
+afr_children_copy (int32_t *dst, int32_t *src, unsigned int child_count)
+{
+ int i = 0;
+
+ for (i = 0; i < child_count; i++)
+ dst[i] = src[i];
+}
+
+void
+afr_xattr_req_prepare (xlator_t *this, dict_t *xattr_req, const char *path)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_set_uint64 (xattr_req, priv->pending_key[i],
+ 3 * sizeof(int32_t));
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: Unable to set dict value for %s",
+ path, priv->pending_key[i]);
+ /* 3 = data+metadata+entry */
+ }
+ ret = dict_set_int32 (xattr_req, GF_GFIDLESS_LOOKUP, 1);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s: failed to set gfidless "
+ "lookup", path);
+ }
+}
+
+int
+afr_lookup_xattr_req_prepare (afr_local_t *local, xlator_t *this,
+ dict_t *xattr_req, loc_t *loc, void **gfid_req)
+{
+ int ret = -ENOMEM;
+
+ GF_ASSERT (gfid_req);
+
+ *gfid_req = NULL;
+ local->xattr_req = dict_new ();
+ if (!local->xattr_req)
+ goto out;
+ if (xattr_req)
+ dict_copy (xattr_req, local->xattr_req);
+
+ afr_xattr_req_prepare (this, local->xattr_req, loc->path);
+ ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_INODELK_COUNT, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: Unable to set dict value for %s",
+ loc->path, GLUSTERFS_INODELK_COUNT);
+ }
+ ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_ENTRYLK_COUNT, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: Unable to set dict value for %s",
+ loc->path, GLUSTERFS_ENTRYLK_COUNT);
+ }
+
+ ret = dict_set_uint32 (local->xattr_req, GLUSTERFS_PARENT_ENTRYLK, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: Unable to set dict value for %s",
+ loc->path, GLUSTERFS_PARENT_ENTRYLK);
+ }
+
+ ret = dict_get_ptr (local->xattr_req, "gfid-req", gfid_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s: failed to get the gfid from dict", loc->path);
+ *gfid_req = NULL;
+ } else {
+ if (loc->parent != NULL)
+ dict_del (local->xattr_req, "gfid-req");
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+void
+afr_lookup_save_gfid (uuid_t dst, void* new, const loc_t *loc)
+{
+ inode_t *inode = NULL;
+
+ inode = loc->inode;
+ if (inode && !uuid_is_null (inode->gfid))
+ uuid_copy (dst, inode->gfid);
+ else if (!uuid_is_null (loc->gfid))
+ uuid_copy (dst, loc->gfid);
+ else if (new && !uuid_is_null (new))
+ uuid_copy (dst, new);
+}
+
+int
+afr_errno_count (int32_t *children, int *child_errno,
+ unsigned int child_count, int32_t op_errno)
+{
+ int i = 0;
+ int errno_count = 0;
+ int child = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (children) {
+ child = children[i];
+ if (child == -1)
+ break;
+ } else {
+ child = i;
+ }
+ if (child_errno[child] == op_errno)
+ errno_count++;
+ }
+ return errno_count;
+}
+
+int32_t
+afr_set_dict_gfid (dict_t *dict, uuid_t gfid)
+{
+ int ret = 0;
+ uuid_t *pgfid = NULL;
+
+ GF_ASSERT (gfid);
+
+ pgfid = GF_CALLOC (1, sizeof (uuid_t), gf_common_mt_char);
+ if (!pgfid) {
+ ret = -1;
+ goto out;
+ }
+
+ uuid_copy (*pgfid, gfid);
+
+ ret = dict_set_dynptr (dict, "gfid-req", pgfid, sizeof (uuid_t));
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR, "gfid set failed");
+
+out:
+ if (ret && pgfid)
+ GF_FREE (pgfid);
+
+ return ret;
+}
+
+void
+afr_inode_ctx_destroy (afr_inode_ctx_t *ctx)
+{
+ if (!ctx)
+ return;
+ GF_FREE (ctx->fresh_children);
+ GF_FREE (ctx);
+}
+
+afr_inode_ctx_t*
+__afr_inode_ctx_get (inode_t *inode, xlator_t *this)
+{
+ int ret = 0;
+ uint64_t ctx_addr = 0;
+ afr_inode_ctx_t *ctx = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ ret = __inode_ctx_get (inode, this, &ctx_addr);
+ if (ret < 0)
+ ctx_addr = 0;
+ if (ctx_addr != 0) {
+ ctx = (afr_inode_ctx_t*) (long) ctx_addr;
+ goto out;
+ }
+ ctx = GF_CALLOC (1, sizeof (*ctx),
+ gf_afr_mt_inode_ctx_t);
+ if (!ctx)
+ goto fail;
+ ctx->fresh_children = GF_CALLOC (priv->child_count,
+ sizeof (*ctx->fresh_children),
+ gf_afr_mt_int32_t);
+ if (!ctx->fresh_children)
+ goto fail;
+ ret = __inode_ctx_put (inode, this, (uint64_t)ctx);
+ if (ret) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "failed to "
+ "set the inode ctx (%s)",
+ uuid_utoa (inode->gfid));
+ goto fail;
+ }
+
+out:
+ return ctx;
+
+fail:
+ afr_inode_ctx_destroy (ctx);
+ return NULL;
+}
+
+afr_inode_ctx_t*
+afr_inode_ctx_get (inode_t *inode, xlator_t *this)
+{
+ afr_inode_ctx_t *ctx = NULL;
+
+ LOCK (&inode->lock);
+ {
+ ctx = __afr_inode_ctx_get (inode, this);
+ }
+ UNLOCK (&inode->lock);
+ return ctx;
+}
+
+void
+afr_inode_get_ctx_params (xlator_t *this, inode_t *inode,
+ afr_inode_params_t *params)
+{
+ GF_ASSERT (inode);
+ GF_ASSERT (params);
+
+ afr_inode_ctx_t *ctx = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
+
+ priv = this->private;
+ LOCK (&inode->lock);
+ {
+ ctx = __afr_inode_ctx_get (inode, this);
+ if (!ctx)
+ goto unlock;
+ switch (params->op) {
+ case AFR_INODE_GET_READ_CTX:
+ fresh_children = params->u.read_ctx.children;
+ read_child = (int32_t)(ctx->masks &
+ AFR_ICTX_READ_CHILD_MASK);
+ params->u.read_ctx.read_child = read_child;
+ if (!fresh_children)
+ goto unlock;
+ for (i = 0; i < priv->child_count; i++)
+ fresh_children[i] = ctx->fresh_children[i];
+ break;
+ case AFR_INODE_GET_OPENDIR_DONE:
+ params->u.value = _gf_false;
+ if (ctx->masks & AFR_ICTX_OPENDIR_DONE_MASK)
+ params->u.value = _gf_true;
+ break;
+ default:
+ GF_ASSERT (0);
+ break;
+ }
+ }
+unlock:
+ UNLOCK (&inode->lock);
+}
+
+gf_boolean_t
+afr_is_split_brain (xlator_t *this, inode_t *inode)
+{
+ afr_inode_ctx_t *ctx = NULL;
+ gf_boolean_t spb = _gf_false;
+
+ ctx = afr_inode_ctx_get (inode, this);
+ if (!ctx)
+ goto out;
+ if ((ctx->mdata_spb == SPB) || (ctx->data_spb == SPB))
+ spb = _gf_true;
+out:
+ return spb;
+}
+
+gf_boolean_t
+afr_is_opendir_done (xlator_t *this, inode_t *inode)
+{
+ afr_inode_params_t params = {0};
+
+ params.op = AFR_INODE_GET_OPENDIR_DONE;
+ afr_inode_get_ctx_params (this, inode, &params);
+ return params.u.value;
+}
+
+int32_t
+afr_inode_get_read_ctx (xlator_t *this, inode_t *inode, int32_t *fresh_children)
+{
+ afr_inode_params_t params = {0};
+
+ params.op = AFR_INODE_GET_READ_CTX;
+ params.u.read_ctx.children = fresh_children;
+ afr_inode_get_ctx_params (this, inode, &params);
+ return params.u.read_ctx.read_child;
+}
+
+void
+afr_inode_ctx_set_read_child (afr_inode_ctx_t *ctx, int32_t read_child)
+{
+ uint64_t remaining_mask = 0;
+ uint64_t mask = 0;
+
+ remaining_mask = (~AFR_ICTX_READ_CHILD_MASK & ctx->masks);
+ mask = (AFR_ICTX_READ_CHILD_MASK & read_child);
+ ctx->masks = remaining_mask | mask;
+}
+
+void
+afr_inode_ctx_set_read_ctx (afr_inode_ctx_t *ctx, int32_t read_child,
+ int32_t *fresh_children, int32_t child_count)
+{
+ int i = 0;
+
+ afr_inode_ctx_set_read_child (ctx, read_child);
+ for (i = 0; i < child_count; i++) {
+ if (fresh_children)
+ ctx->fresh_children[i] = fresh_children[i];
+ else
+ ctx->fresh_children[i] = -1;
+ }
+}
+
+void
+afr_inode_ctx_rm_stale_children (afr_inode_ctx_t *ctx, int32_t *stale_children,
+ int32_t child_count)
+{
+ int i = 0;
+ int32_t read_child = -1;
+
+ GF_ASSERT (stale_children);
+ for (i = 0; i < child_count; i++) {
+ if (stale_children[i] == -1)
+ break;
+ afr_children_rm_child (ctx->fresh_children,
+ stale_children[i], child_count);
+ }
+ read_child = (int32_t)(ctx->masks & AFR_ICTX_READ_CHILD_MASK);
+ if (!afr_is_child_present (ctx->fresh_children, child_count,
+ read_child))
+ afr_inode_ctx_set_read_child (ctx, ctx->fresh_children[0]);
+}
+
+void
+afr_inode_ctx_set_opendir_done (afr_inode_ctx_t *ctx)
+{
+ uint64_t remaining_mask = 0;
+ uint64_t mask = 0;
+
+ remaining_mask = (~AFR_ICTX_OPENDIR_DONE_MASK & ctx->masks);
+ mask = (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_OPENDIR_DONE_MASK);
+ ctx->masks = remaining_mask | mask;
+}
+
+void
+afr_inode_set_ctx_params (xlator_t *this, inode_t *inode,
+ afr_inode_params_t *params)
+{
+ GF_ASSERT (inode);
+ GF_ASSERT (params);
+
+ afr_inode_ctx_t *ctx = NULL;
+ afr_private_t *priv = NULL;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
+ int32_t *stale_children = NULL;
+
+ priv = this->private;
+ LOCK (&inode->lock);
+ {
+ ctx = __afr_inode_ctx_get (inode, this);
+ if (!ctx)
+ goto unlock;
+ switch (params->op) {
+ case AFR_INODE_SET_READ_CTX:
+ read_child = params->u.read_ctx.read_child;
+ fresh_children = params->u.read_ctx.children;
+ afr_inode_ctx_set_read_ctx (ctx, read_child,
+ fresh_children,
+ priv->child_count);
+ break;
+ case AFR_INODE_RM_STALE_CHILDREN:
+ stale_children = params->u.read_ctx.children;
+ afr_inode_ctx_rm_stale_children (ctx,
+ stale_children,
+ priv->child_count);
+ break;
+ case AFR_INODE_SET_OPENDIR_DONE:
+ afr_inode_ctx_set_opendir_done (ctx);
+ break;
+ default:
+ GF_ASSERT (0);
+ break;
+ }
+ }
+unlock:
+ UNLOCK (&inode->lock);
+}
+
+void
+afr_set_split_brain (xlator_t *this, inode_t *inode, afr_spb_state_t mdata_spb,
+ afr_spb_state_t data_spb)
+{
+ afr_inode_ctx_t *ctx = NULL;
+
+ ctx = afr_inode_ctx_get (inode, this);
+ if (mdata_spb != DONT_KNOW)
+ ctx->mdata_spb = mdata_spb;
+ if (data_spb != DONT_KNOW)
+ ctx->data_spb = data_spb;
+}
+
+void
+afr_set_opendir_done (xlator_t *this, inode_t *inode)
+{
+ afr_inode_params_t params = {0};
+
+ params.op = AFR_INODE_SET_OPENDIR_DONE;
+ afr_inode_set_ctx_params (this, inode, &params);
+}
+
+void
+afr_inode_set_read_ctx (xlator_t *this, inode_t *inode, int32_t read_child,
+ int32_t *fresh_children)
+{
+ afr_inode_params_t params = {0};
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ GF_ASSERT (read_child >= 0);
+ GF_ASSERT (fresh_children);
+ GF_ASSERT (afr_is_child_present (fresh_children, priv->child_count,
+ read_child));
+
+ params.op = AFR_INODE_SET_READ_CTX;
+ params.u.read_ctx.read_child = read_child;
+ params.u.read_ctx.children = fresh_children;
+ afr_inode_set_ctx_params (this, inode, &params);
+}
+
+void
+afr_inode_rm_stale_children (xlator_t *this, inode_t *inode,
+ int32_t *stale_children)
+{
+ afr_inode_params_t params = {0};
+
+ GF_ASSERT (stale_children);
+
+ params.op = AFR_INODE_RM_STALE_CHILDREN;
+ params.u.read_ctx.children = stale_children;
+ afr_inode_set_ctx_params (this, inode, &params);
+}
+
+gf_boolean_t
+afr_is_source_child (int32_t *sources, int32_t child_count, int32_t child)
+{
+ gf_boolean_t source_xattrs = _gf_false;
+
+ GF_ASSERT (child < child_count);
+
+ if ((child >= 0) && (child < child_count) &&
+ sources[child]) {
+ source_xattrs = _gf_true;
+ }
+ return source_xattrs;
+}
+
+gf_boolean_t
+afr_is_child_present (int32_t *success_children, int32_t child_count,
+ int32_t child)
+{
+ gf_boolean_t success_child = _gf_false;
+ int i = 0;
+
+ GF_ASSERT (child < child_count);
+
+ for (i = 0; i < child_count; i++) {
+ if (success_children[i] == -1)
+ break;
+ if (child == success_children[i]) {
+ success_child = _gf_true;
+ break;
+ }
+ }
+ return success_child;
+}
+
+gf_boolean_t
+afr_is_read_child (int32_t *success_children, int32_t *sources,
+ int32_t child_count, int32_t child)
+{
+ gf_boolean_t success_child = _gf_false;
+ gf_boolean_t source = _gf_false;
+
+ if (child < 0) {
+ return _gf_false;
+ }
+
+ GF_ASSERT (success_children);
+ GF_ASSERT (child_count > 0);
+
+ success_child = afr_is_child_present (success_children, child_count,
+ child);
+ if (!success_child)
+ goto out;
+ if (NULL == sources) {
+ source = _gf_true;
+ goto out;
+ }
+ source = afr_is_source_child (sources, child_count, child);
+out:
+ return (success_child && source);
+}
+
+int32_t
+afr_hash_child (int32_t *success_children, int32_t child_count,
+ unsigned int hmode, uuid_t gfid)
+{
+ uuid_t gfid_copy = {0,};
+ pid_t pid;
+
+ if (!hmode) {
+ return -1;
+ }
+
+ if (gfid) {
+ uuid_copy(gfid_copy,gfid);
+ }
+ if (hmode > 1) {
+ /*
+ * Why getpid? Because it's one of the cheapest calls
+ * available - faster than gethostname etc. - and returns a
+ * constant-length value that's sure to be shorter than a UUID.
+ * It's still very unlikely to be the same across clients, so
+ * it still provides good mixing. We're not trying for
+ * perfection here. All we need is a low probability that
+ * multiple clients won't converge on the same subvolume.
+ */
+ pid = getpid();
+ memcpy (gfid_copy, &pid, sizeof(pid));
+ }
+
+ return SuperFastHash((char *)gfid_copy,
+ sizeof(gfid_copy)) % child_count;
+}
+
+/* If sources is NULL the xattrs are assumed to be of source for all
+ * success_children.
+ */
+int
+afr_select_read_child_from_policy (int32_t *success_children,
+ int32_t child_count, int32_t prev_read_child,
+ int32_t config_read_child, int32_t *sources,
+ unsigned int hmode, uuid_t gfid)
+{
+ int32_t read_child = -1;
+ int i = 0;
+
+ GF_ASSERT (success_children);
+
+ read_child = config_read_child;
+ if (afr_is_read_child (success_children, sources, child_count,
+ read_child))
+ goto out;
+
+ read_child = prev_read_child;
+ if (afr_is_read_child (success_children, sources, child_count,
+ read_child))
+ goto out;
+
+ read_child = afr_hash_child (success_children, child_count,
+ hmode, gfid);
+ if (afr_is_read_child (success_children, sources, child_count,
+ read_child)) {
+ goto out;
+ }
+
+ for (i = 0; i < child_count; i++) {
+ read_child = success_children[i];
+ if (read_child < 0)
+ break;
+ if (afr_is_read_child (success_children, sources, child_count,
+ read_child))
+ goto out;
+ }
+ read_child = -1;
+
+out:
+ return read_child;
+}
+
+/* This function should be used when all the success_children are sources
+ */
+void
+afr_set_read_ctx_from_policy (xlator_t *this, inode_t *inode,
+ int32_t *fresh_children, int32_t prev_read_child,
+ int32_t config_read_child, uuid_t gfid)
+{
+ int read_child = -1;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ read_child = afr_select_read_child_from_policy (fresh_children,
+ priv->child_count,
+ prev_read_child,
+ config_read_child,
+ NULL,
+ priv->hash_mode, gfid);
+ if (read_child >= 0)
+ afr_inode_set_read_ctx (this, inode, read_child,
+ fresh_children);
+}
+
+/* afr_next_call_child ()
+ * This is a common function used by all the read-type fops
+ * This function should not be called with the inode's read_children array.
+ * The fop's handler should make a copy of the inode's read_children,
+ * preferred read_child into the local vars, because while this function is
+ * in execution there is a chance for inode's read_ctx to change.
+ */
+int32_t
+afr_next_call_child (int32_t *fresh_children, unsigned char *child_up,
+ size_t child_count, int32_t *last_index,
+ int32_t read_child)
+{
+ int next_index = 0;
+ int32_t next_call_child = -1;
+
+ GF_ASSERT (last_index);
+
+ next_index = *last_index;
+retry:
+ next_index++;
+ if ((next_index >= child_count) ||
+ (fresh_children[next_index] == -1))
+ goto out;
+ if ((fresh_children[next_index] == read_child) ||
+ (!child_up[fresh_children[next_index]]))
+ goto retry;
+ *last_index = next_index;
+ next_call_child = fresh_children[next_index];
+out:
+ return next_call_child;
+}
+
+ /* This function should not be called with the inode's read_children array.
+ * The fop's handler should make a copy of the inode's read_children,
+ * preferred read_child into the local vars, because while this function is
+ * in execution there is a chance for inode's read_ctx to change.
+ */
+int32_t
+afr_get_call_child (xlator_t *this, unsigned char *child_up, int32_t read_child,
+ int32_t *fresh_children,
+ int32_t *call_child, int32_t *last_index)
+{
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ GF_ASSERT (child_up);
+ GF_ASSERT (call_child);
+ GF_ASSERT (last_index);
+ GF_ASSERT (fresh_children);
+
+ if (read_child < 0) {
+ ret = -EIO;
+ goto out;
+ }
+ priv = this->private;
+ *call_child = -1;
+ *last_index = -1;
+
+ if (child_up[read_child]) {
+ *call_child = read_child;
+ } else {
+ for (i = 0; i < priv->child_count; i++) {
+ if (fresh_children[i] == -1)
+ break;
+ if (child_up[fresh_children[i]]) {
+ *call_child = fresh_children[i];
+ ret = 0;
+ break;
+ }
+ }
+
+ if (*call_child == -1) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+
+ *last_index = i;
+ }
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d, call_child: %d, "
+ "last_index: %d", ret, *call_child, *last_index);
+ return ret;
+}
+
+void
+afr_reset_xattr (dict_t **xattr, unsigned int child_count)
+{
+ unsigned int i = 0;
+
+ if (!xattr)
+ goto out;
+ for (i = 0; i < child_count; i++) {
+ if (xattr[i]) {
+ dict_unref (xattr[i]);
+ xattr[i] = NULL;
+ }
+ }
+out:
+ return;
+}
+
+void
+afr_xattr_array_destroy (dict_t **xattr, unsigned int child_count)
+{
+ afr_reset_xattr (xattr, child_count);
+ GF_FREE (xattr);
+}
+
+void
+afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)
+{
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+
+ sh = &local->self_heal;
+ priv = this->private;
+
+ if (sh->data_sh_info && strcmp (sh->data_sh_info, ""))
+ GF_FREE (sh->data_sh_info);
+
+ if (sh->metadata_sh_info && strcmp (sh->metadata_sh_info, ""))
+ GF_FREE (sh->metadata_sh_info);
+
+ GF_FREE (sh->buf);
+
+ GF_FREE (sh->parentbufs);
+
+ if (sh->inode)
+ inode_unref (sh->inode);
+
+ afr_xattr_array_destroy (sh->xattr, priv->child_count);
+
+ GF_FREE (sh->child_errno);
+
+ afr_matrix_cleanup (sh->pending_matrix, priv->child_count);
+ afr_matrix_cleanup (sh->delta_matrix, priv->child_count);
+
+ GF_FREE (sh->sources);
+
+ GF_FREE (sh->success);
+
+ GF_FREE (sh->locked_nodes);
+
+ if (sh->healing_fd) {
+ fd_unref (sh->healing_fd);
+ sh->healing_fd = NULL;
+ }
+
+ GF_FREE ((char *)sh->linkname);
+
+ GF_FREE (sh->success_children);
+
+ GF_FREE (sh->fresh_children);
+
+ GF_FREE (sh->fresh_parent_dirs);
+
+ loc_wipe (&sh->parent_loc);
+ loc_wipe (&sh->lookup_loc);
+
+ GF_FREE (sh->checksum);
+
+ GF_FREE (sh->write_needed);
+ if (sh->healing_fd)
+ fd_unref (sh->healing_fd);
+}
+
+
+void
+afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ priv = this->private;
+
+ afr_matrix_cleanup (local->pending, priv->child_count);
+ afr_matrix_cleanup (local->transaction.txn_changelog,
+ priv->child_count);
+
+ GF_FREE (local->internal_lock.locked_nodes);
+
+ for (i = 0; local->internal_lock.inodelk[i].domain; i++) {
+ GF_FREE (local->internal_lock.inodelk[i].locked_nodes);
+ }
+
+ GF_FREE (local->internal_lock.lower_locked_nodes);
+
+ afr_entry_lockee_cleanup (&local->internal_lock);
+
+ GF_FREE (local->transaction.pre_op);
+ GF_FREE (local->transaction.eager_lock);
+
+ GF_FREE (local->transaction.basename);
+ GF_FREE (local->transaction.new_basename);
+
+ loc_wipe (&local->transaction.parent_loc);
+ loc_wipe (&local->transaction.new_parent_loc);
+
+ GF_FREE (local->transaction.postop_piggybacked);
+}
+
+
+void
+afr_local_cleanup (afr_local_t *local, xlator_t *this)
+{
+ afr_private_t * priv = NULL;
+
+ if (!local)
+ return;
+
+ afr_local_sh_cleanup (local, this);
+
+ afr_local_transaction_cleanup (local, this);
+
+ priv = this->private;
+
+ loc_wipe (&local->loc);
+ loc_wipe (&local->newloc);
+
+ if (local->fd)
+ fd_unref (local->fd);
+
+ if (local->xattr_req)
+ dict_unref (local->xattr_req);
+
+ if (local->dict)
+ dict_unref (local->dict);
+
+ GF_FREE(local->replies);
+
+ GF_FREE (local->child_up);
+
+ GF_FREE (local->child_errno);
+
+ GF_FREE (local->fresh_children);
+
+ { /* lookup */
+ if (local->cont.lookup.xattrs) {
+ afr_reset_xattr (local->cont.lookup.xattrs,
+ priv->child_count);
+ GF_FREE (local->cont.lookup.xattrs);
+ local->cont.lookup.xattrs = NULL;
+ }
+
+ if (local->cont.lookup.xattr) {
+ dict_unref (local->cont.lookup.xattr);
+ }
+
+ if (local->cont.lookup.inode) {
+ inode_unref (local->cont.lookup.inode);
+ }
+
+ GF_FREE (local->cont.lookup.postparents);
+
+ GF_FREE (local->cont.lookup.bufs);
+
+ GF_FREE (local->cont.lookup.success_children);
+
+ GF_FREE (local->cont.lookup.sources);
+ afr_matrix_cleanup (local->cont.lookup.pending_matrix,
+ priv->child_count);
+ }
+
+ { /* getxattr */
+ GF_FREE (local->cont.getxattr.name);
+ }
+
+ { /* lk */
+ GF_FREE (local->cont.lk.locked_nodes);
+ }
+
+ { /* create */
+ if (local->cont.create.fd)
+ fd_unref (local->cont.create.fd);
+ if (local->cont.create.params)
+ dict_unref (local->cont.create.params);
+ }
+
+ { /* mknod */
+ if (local->cont.mknod.params)
+ dict_unref (local->cont.mknod.params);
+ }
+
+ { /* mkdir */
+ if (local->cont.mkdir.params)
+ dict_unref (local->cont.mkdir.params);
+ }
+
+ { /* symlink */
+ if (local->cont.symlink.params)
+ dict_unref (local->cont.symlink.params);
+ }
+
+ { /* writev */
+ GF_FREE (local->cont.writev.vector);
+ }
+
+ { /* setxattr */
+ if (local->cont.setxattr.dict)
+ dict_unref (local->cont.setxattr.dict);
+ }
+
+ { /* fsetxattr */
+ if (local->cont.fsetxattr.dict)
+ dict_unref (local->cont.fsetxattr.dict);
+ }
+
+ { /* removexattr */
+ GF_FREE (local->cont.removexattr.name);
+ }
+ { /* xattrop */
+ if (local->cont.xattrop.xattr)
+ dict_unref (local->cont.xattrop.xattr);
+ }
+ { /* fxattrop */
+ if (local->cont.fxattrop.xattr)
+ dict_unref (local->cont.fxattrop.xattr);
+ }
+ { /* symlink */
+ GF_FREE (local->cont.symlink.linkpath);
+ }
+
+ { /* opendir */
+ GF_FREE (local->cont.opendir.checksum);
+ }
+
+ { /* readdirp */
+ if (local->cont.readdir.dict)
+ dict_unref (local->cont.readdir.dict);
+ }
+
+ if (local->xdata_req)
+ dict_unref (local->xdata_req);
+
+ if (local->xdata_rsp)
+ dict_unref (local->xdata_rsp);
+}
+
+
+int
+afr_frame_return (call_frame_t *frame)
+{
+ afr_local_t *local = NULL;
+ int call_count = 0;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ call_count = --local->call_count;
+ }
+ UNLOCK (&frame->lock);
+
+ return call_count;
+}
+
+int
+afr_set_elem_count_get (unsigned char *elems, int child_count)
+{
+ int i = 0;
+ int ret = 0;
+
+ for (i = 0; i < child_count; i++)
+ if (elems[i])
+ ret++;
+ return ret;
+}
+
+/**
+ * up_children_count - return the number of children that are up
+ */
+
+unsigned int
+afr_up_children_count (unsigned char *child_up, unsigned int child_count)
+{
+ return afr_set_elem_count_get (child_up, child_count);
+}
+
+unsigned int
+afr_locked_children_count (unsigned char *children, unsigned int child_count)
+{
+ return afr_set_elem_count_get (children, child_count);
+}
+
+unsigned int
+afr_pre_op_done_children_count (unsigned char *pre_op,
+ unsigned int child_count)
+{
+ return afr_set_elem_count_get (pre_op, child_count);
+}
+
+gf_boolean_t
+afr_is_fresh_lookup (loc_t *loc, xlator_t *this)
+{
+ uint64_t ctx = 0;
+ int32_t ret = 0;
+
+ GF_ASSERT (loc);
+ GF_ASSERT (this);
+ GF_ASSERT (loc->inode);
+
+ ret = inode_ctx_get (loc->inode, this, &ctx);
+ if (0 == ret)
+ return _gf_false;
+ return _gf_true;
+}
+
+void
+afr_update_loc_gfids (loc_t *loc, struct iatt *buf, struct iatt *postparent)
+{
+ GF_ASSERT (loc);
+ GF_ASSERT (buf);
+
+ uuid_copy (loc->gfid, buf->ia_gfid);
+ if (postparent)
+ uuid_copy (loc->pargfid, postparent->ia_gfid);
+}
+
+/*
+ * Quota size xattrs are not maintained by afr. There is a
+ * possibility that they differ even when both the directory changelog xattrs
+ * suggest everything is fine. So if there is at least one 'source' check among
+ * the sources which has the maximum quota size. Otherwise check among all the
+ * available ones for maximum quota size. This way if there is a source and
+ * stale copies it always votes for the 'source'.
+ * */
+
+static void
+afr_handle_quota_size (afr_local_t *local, xlator_t *this,
+ dict_t *rsp_dict)
+{
+ int32_t *sources = NULL;
+ dict_t *xattr = NULL;
+ data_t *max_data = NULL;
+ int64_t max_quota_size = -1;
+ data_t *data = NULL;
+ int64_t *size = NULL;
+ int64_t quota_size = -1;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int ret = -1;
+ gf_boolean_t source_present = _gf_false;
+
+ priv = this->private;
+ sources = local->cont.lookup.sources;
+
+ if (rsp_dict == NULL) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "%s: Invalid "
+ "response dictionary", local->loc.path);
+ return;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i]) {
+ source_present = _gf_true;
+ break;
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ /*
+ * If there is at least one source lets check
+ * for maximum quota sizes among sources, otherwise take the
+ * maximum of the ones present to be on the safer side.
+ */
+ if (source_present && !sources[i])
+ continue;
+
+ xattr = local->cont.lookup.xattrs[i];
+ if (!xattr)
+ continue;
+
+ data = dict_get (xattr, QUOTA_SIZE_KEY);
+ if (!data)
+ continue;
+
+ size = (int64_t*)data->data;
+ quota_size = ntoh64(*size);
+ gf_log (this->name, GF_LOG_DEBUG, "%s: %d, size: %"PRId64,
+ local->loc.path, i, quota_size);
+ if (quota_size > max_quota_size) {
+ if (max_data)
+ data_unref (max_data);
+
+ max_quota_size = quota_size;
+ max_data = data_ref (data);
+ }
+ }
+
+ if (max_data) {
+ ret = dict_set (rsp_dict, QUOTA_SIZE_KEY, max_data);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Failed to set "
+ "quota size", local->loc.path);
+ }
+
+ data_unref (max_data);
+ }
+}
+
+int
+afr_lookup_build_response_params (afr_local_t *local, xlator_t *this)
+{
+ struct iatt *buf = NULL;
+ struct iatt *postparent = NULL;
+ dict_t **xattr = NULL;
+ int32_t *success_children = NULL;
+ int32_t *sources = NULL;
+ afr_private_t *priv = NULL;
+ int32_t read_child = -1;
+ int ret = 0;
+ int i = 0;
+
+ GF_ASSERT (local);
+
+ buf = &local->cont.lookup.buf;
+ postparent = &local->cont.lookup.postparent;
+ xattr = &local->cont.lookup.xattr;
+ priv = this->private;
+
+ read_child = afr_inode_get_read_ctx (this, local->cont.lookup.inode,
+ local->fresh_children);
+ if (read_child < 0) {
+ ret = -1;
+ goto out;
+ }
+ success_children = local->cont.lookup.success_children;
+ sources = local->cont.lookup.sources;
+ memset (sources, 0, sizeof (*sources) * priv->child_count);
+ afr_children_intersection_get (local->fresh_children, success_children,
+ sources, priv->child_count);
+ if (!sources[read_child]) {
+ read_child = -1;
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i]) {
+ read_child = i;
+ break;
+ }
+ }
+ }
+ if (read_child < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "Building lookup response from %d",
+ read_child);
+ if (!*xattr)
+ *xattr = dict_ref (local->cont.lookup.xattrs[read_child]);
+
+ *buf = local->cont.lookup.bufs[read_child];
+ *postparent = local->cont.lookup.postparents[read_child];
+
+ if (dict_get (local->xattr_req, QUOTA_SIZE_KEY))
+ afr_handle_quota_size (local, this, *xattr);
+
+ if (IA_INVAL == local->cont.lookup.inode->ia_type) {
+ /* fix for RT #602 */
+ local->cont.lookup.inode->ia_type = buf->ia_type;
+ }
+out:
+ return ret;
+}
+
+static void
+afr_lookup_update_lk_counts (afr_local_t *local, xlator_t *this,
+ int child_index, dict_t *xattr)
+{
+ uint32_t inodelk_count = 0;
+ uint32_t entrylk_count = 0;
+ int ret = -1;
+ uint32_t parent_entrylk = 0;
+
+ GF_ASSERT (local);
+ GF_ASSERT (this);
+ GF_ASSERT (xattr);
+ GF_ASSERT (child_index >= 0);
+
+ ret = dict_get_uint32 (xattr, GLUSTERFS_INODELK_COUNT,
+ &inodelk_count);
+ if (ret == 0)
+ local->inodelk_count += inodelk_count;
+
+ ret = dict_get_uint32 (xattr, GLUSTERFS_ENTRYLK_COUNT,
+ &entrylk_count);
+ if (ret == 0)
+ local->entrylk_count += entrylk_count;
+ ret = dict_get_uint32 (xattr, GLUSTERFS_PARENT_ENTRYLK,
+ &parent_entrylk);
+ if (!ret)
+ local->cont.lookup.parent_entrylk += parent_entrylk;
+}
+
+/*
+ * It's important to maintain a commutative property on do_*_self_heal and
+ * found*; once set, they must not be cleared by a subsequent iteration or
+ * call, so that they represent a logical OR of all iterations and calls
+ * regardless of child/key order. That allows the caller to call us multiple
+ * times without having to use a separate variable as a "reduce" accumulator.
+ */
+static void
+afr_lookup_set_self_heal_params_by_xattr (afr_local_t *local, xlator_t *this,
+ dict_t *xattr)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int ret = -1;
+ void *pending_raw = NULL;
+ int32_t *pending = NULL;
+
+ GF_ASSERT (local);
+ GF_ASSERT (this);
+ GF_ASSERT (xattr);
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_get_ptr (xattr, priv->pending_key[i],
+ &pending_raw);
+ if (ret != 0) {
+ continue;
+ }
+ pending = pending_raw;
+
+ if (pending[AFR_METADATA_TRANSACTION]) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "metadata self-heal is pending for %s.",
+ local->loc.path);
+ local->self_heal.do_metadata_self_heal = _gf_true;
+ }
+
+ if (pending[AFR_ENTRY_TRANSACTION]) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "entry self-heal is pending for %s.",
+ local->loc.path);
+ local->self_heal.do_entry_self_heal = _gf_true;
+ }
+
+ if (pending[AFR_DATA_TRANSACTION]) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "data self-heal is pending for %s.",
+ local->loc.path);
+ local->self_heal.do_data_self_heal = _gf_true;
+ }
+ }
+}
+
+void
+afr_lookup_check_set_metadata_split_brain (afr_local_t *local, xlator_t *this)
+{
+ int32_t *sources = NULL;
+ afr_private_t *priv = NULL;
+ int32_t subvol_status = 0;
+ int32_t *success_children = NULL;
+ dict_t **xattrs = NULL;
+ struct iatt *bufs = NULL;
+ int32_t **pending_matrix = NULL;
+
+ priv = this->private;
+
+ sources = GF_CALLOC (priv->child_count, sizeof (*sources),
+ gf_afr_mt_int32_t);
+ if (NULL == sources)
+ goto out;
+ success_children = local->cont.lookup.success_children;
+ xattrs = local->cont.lookup.xattrs;
+ bufs = local->cont.lookup.bufs;
+ pending_matrix = local->cont.lookup.pending_matrix;
+ afr_build_sources (this, xattrs, bufs, pending_matrix,
+ sources, success_children, AFR_METADATA_TRANSACTION,
+ &subvol_status, _gf_false);
+ if (subvol_status & SPLIT_BRAIN)
+ local->cont.lookup.possible_spb = _gf_true;
+out:
+ GF_FREE (sources);
+}
+
+static void
+afr_detect_self_heal_by_iatt (afr_local_t *local, xlator_t *this,
+ struct iatt *buf, struct iatt *lookup_buf)
+{
+ if (PERMISSION_DIFFERS (buf, lookup_buf)) {
+ /* mismatching permissions */
+ gf_log (this->name, GF_LOG_DEBUG,
+ "permissions differ for %s ", local->loc.path);
+ local->self_heal.do_metadata_self_heal = _gf_true;
+ }
+
+ if (OWNERSHIP_DIFFERS (buf, lookup_buf)) {
+ /* mismatching permissions */
+ local->self_heal.do_metadata_self_heal = _gf_true;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "ownership differs for %s ", local->loc.path);
+ }
+
+ if (SIZE_DIFFERS (buf, lookup_buf)
+ && IA_ISREG (buf->ia_type)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "size differs for %s ", local->loc.path);
+ local->self_heal.do_data_self_heal = _gf_true;
+ }
+
+ if (uuid_compare (buf->ia_gfid, lookup_buf->ia_gfid)) {
+ /* mismatching gfid */
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s: gfid different on subvolume", local->loc.path);
+ }
+}
+
+static void
+afr_detect_self_heal_by_split_brain_status (afr_local_t *local, xlator_t *this)
+{
+ gf_boolean_t split_brain = _gf_false;
+ afr_self_heal_t *sh = NULL;
+
+ sh = &local->self_heal;
+
+ split_brain = afr_is_split_brain (this, local->cont.lookup.inode);
+ split_brain = split_brain || local->cont.lookup.possible_spb;
+ if ((local->success_count > 0) && split_brain &&
+ IA_ISREG (local->cont.lookup.inode->ia_type)) {
+ sh->force_confirm_spb = _gf_true;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "split brain detected during lookup of %s.",
+ local->loc.path);
+ }
+}
+
+static void
+afr_detect_self_heal_by_lookup_status (afr_local_t *local, xlator_t *this)
+{
+ GF_ASSERT (local);
+ GF_ASSERT (this);
+
+ if ((local->success_count > 0) && (local->enoent_count > 0)) {
+ local->self_heal.do_metadata_self_heal = _gf_true;
+ local->self_heal.do_data_self_heal = _gf_true;
+ local->self_heal.do_entry_self_heal = _gf_true;
+ local->self_heal.do_gfid_self_heal = _gf_true;
+ local->self_heal.do_missing_entry_self_heal = _gf_true;
+ gf_log(this->name, GF_LOG_DEBUG,
+ "entries are missing in lookup of %s.",
+ local->loc.path);
+ }
+
+ return;
+}
+
+gf_boolean_t
+afr_can_self_heal_proceed (afr_self_heal_t *sh, afr_private_t *priv)
+{
+ GF_ASSERT (sh);
+ GF_ASSERT (priv);
+
+ if (sh->force_confirm_spb)
+ return _gf_true;
+ return (sh->do_gfid_self_heal
+ || sh->do_missing_entry_self_heal
+ || (afr_data_self_heal_enabled (priv->data_self_heal) &&
+ sh->do_data_self_heal)
+ || (priv->metadata_self_heal && sh->do_metadata_self_heal)
+ || (priv->entry_self_heal && sh->do_entry_self_heal));
+}
+
+afr_transaction_type
+afr_transaction_type_get (ia_type_t ia_type)
+{
+ afr_transaction_type type = AFR_METADATA_TRANSACTION;
+
+ GF_ASSERT (ia_type != IA_INVAL);
+
+ if (IA_ISDIR (ia_type)) {
+ type = AFR_ENTRY_TRANSACTION;
+ } else if (IA_ISREG (ia_type)) {
+ type = AFR_DATA_TRANSACTION;
+ }
+ return type;
+}
+
+int
+afr_lookup_select_read_child (afr_local_t *local, xlator_t *this,
+ int32_t *read_child)
+{
+ ia_type_t ia_type = IA_INVAL;
+ int32_t source = -1;
+ int ret = -1;
+ dict_t **xattrs = NULL;
+ int32_t *success_children = NULL;
+ afr_transaction_type type = AFR_METADATA_TRANSACTION;
+ uuid_t *gfid = NULL;
+
+ GF_ASSERT (local);
+ GF_ASSERT (this);
+ GF_ASSERT (local->success_count > 0);
+
+ success_children = local->cont.lookup.success_children;
+ /*We can take the success_children[0] only because we already
+ *handle the conflicting children other wise, we could select the
+ *read_child based on wrong file type
+ */
+ ia_type = local->cont.lookup.bufs[success_children[0]].ia_type;
+ type = afr_transaction_type_get (ia_type);
+ xattrs = local->cont.lookup.xattrs;
+ gfid = &local->cont.lookup.buf.ia_gfid;
+ source = afr_lookup_select_read_child_by_txn_type (this, local, xattrs,
+ type, *gfid);
+ if (source < 0) {
+ gf_log (this->name, GF_LOG_DEBUG, "failed to select source "
+ "for %s", local->loc.path);
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "Source selected as %d for %s",
+ source, local->loc.path);
+ *read_child = source;
+ ret = 0;
+out:
+ return ret;
+}
+
+static inline gf_boolean_t
+afr_is_transaction_running (afr_local_t *local)
+{
+ GF_ASSERT (local->fop == GF_FOP_LOOKUP);
+ return ((local->inodelk_count > 0) || (local->entrylk_count > 0));
+}
+
+void
+afr_launch_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ gf_boolean_t background, ia_type_t ia_type, char *reason,
+ void (*gfid_sh_success_cbk) (call_frame_t *sh_frame,
+ xlator_t *this),
+ int (*unwind) (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ int32_t sh_failed))
+{
+ afr_local_t *local = NULL;
+ char sh_type_str[256] = {0,};
+ char *bg = "";
+
+ GF_ASSERT (frame);
+ GF_ASSERT (this);
+ GF_ASSERT (inode);
+ GF_ASSERT (ia_type != IA_INVAL);
+
+ local = frame->local;
+ local->self_heal.background = background;
+ local->self_heal.type = ia_type;
+ local->self_heal.unwind = unwind;
+ local->self_heal.gfid_sh_success_cbk = gfid_sh_success_cbk;
+
+ afr_self_heal_type_str_get (&local->self_heal,
+ sh_type_str,
+ sizeof (sh_type_str));
+
+ if (background)
+ bg = "background";
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s %s self-heal triggered. path: %s, reason: %s", bg,
+ sh_type_str, local->loc.path, reason);
+
+ afr_self_heal (frame, this, inode);
+}
+
+unsigned int
+afr_gfid_missing_count (const char *xlator_name, int32_t *success_children,
+ struct iatt *bufs, unsigned int child_count,
+ const char *path)
+{
+ unsigned int gfid_miss_count = 0;
+ int i = 0;
+ struct iatt *child1 = NULL;
+
+ for (i = 0; i < child_count; i++) {
+ if (success_children[i] == -1)
+ break;
+ child1 = &bufs[success_children[i]];
+ if (uuid_is_null (child1->ia_gfid)) {
+ gf_log (xlator_name, GF_LOG_DEBUG, "%s: gfid is null"
+ " on subvolume %d", path, success_children[i]);
+ gfid_miss_count++;
+ }
+ }
+
+ return gfid_miss_count;
+}
+
+static int
+afr_lookup_gfid_missing_count (afr_local_t *local, xlator_t *this)
+{
+ int32_t *success_children = NULL;
+ afr_private_t *priv = NULL;
+ struct iatt *bufs = NULL;
+ int miss_count = 0;
+
+ priv = this->private;
+ bufs = local->cont.lookup.bufs;
+ success_children = local->cont.lookup.success_children;
+
+ miss_count = afr_gfid_missing_count (this->name, success_children,
+ bufs, priv->child_count,
+ local->loc.path);
+ return miss_count;
+}
+
+gf_boolean_t
+afr_conflicting_iattrs (struct iatt *bufs, int32_t *success_children,
+ unsigned int child_count, const char *path,
+ const char *xlator_name)
+{
+ gf_boolean_t conflicting = _gf_false;
+ int i = 0;
+ struct iatt *child1 = NULL;
+ struct iatt *child2 = NULL;
+ uuid_t *gfid = NULL;
+
+ for (i = 0; i < child_count; i++) {
+ if (success_children[i] == -1)
+ break;
+ child1 = &bufs[success_children[i]];
+ if ((!gfid) && (!uuid_is_null (child1->ia_gfid)))
+ gfid = &child1->ia_gfid;
+
+ if (i == 0)
+ continue;
+
+ child2 = &bufs[success_children[i-1]];
+ if (FILETYPE_DIFFERS (child1, child2)) {
+ gf_log (xlator_name, GF_LOG_DEBUG, "%s: filetype "
+ "differs on subvolumes (%d, %d)", path,
+ success_children[i-1], success_children[i]);
+ conflicting = _gf_true;
+ goto out;
+ }
+ if (!gfid || uuid_is_null (child1->ia_gfid))
+ continue;
+ if (uuid_compare (*gfid, child1->ia_gfid)) {
+ gf_log (xlator_name, GF_LOG_DEBUG, "%s: gfid differs"
+ " on subvolume %d", path, success_children[i]);
+ conflicting = _gf_true;
+ goto out;
+ }
+ }
+out:
+ return conflicting;
+}
+
+/* afr_update_gfid_from_iatts: This function should be called only if the
+ * iatts are not conflicting.
+ */
+void
+afr_update_gfid_from_iatts (uuid_t uuid, struct iatt *bufs,
+ int32_t *success_children, unsigned int child_count)
+{
+ uuid_t *gfid = NULL;
+ int i = 0;
+ int child = 0;
+
+ for (i = 0; i < child_count; i++) {
+ child = success_children[i];
+ if (child == -1)
+ break;
+ if ((!gfid) && (!uuid_is_null (bufs[child].ia_gfid))) {
+ gfid = &bufs[child].ia_gfid;
+ } else if (gfid && (!uuid_is_null (bufs[child].ia_gfid))) {
+ if (uuid_compare (*gfid, bufs[child].ia_gfid)) {
+ GF_ASSERT (0);
+ goto out;
+ }
+ }
+ }
+ if (gfid && (!uuid_is_null (*gfid)))
+ uuid_copy (uuid, *gfid);
+out:
+ return;
+}
+
+static gf_boolean_t
+afr_lookup_conflicting_entries (afr_local_t *local, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ gf_boolean_t conflict = _gf_false;
+
+ priv = this->private;
+ conflict = afr_conflicting_iattrs (local->cont.lookup.bufs,
+ local->cont.lookup.success_children,
+ priv->child_count, local->loc.path,
+ this->name);
+ return conflict;
+}
+
+gf_boolean_t
+afr_open_only_data_self_heal (char *data_self_heal)
+{
+ return !strcmp (data_self_heal, "open");
+}
+
+gf_boolean_t
+afr_data_self_heal_enabled (char *data_self_heal)
+{
+ gf_boolean_t enabled = _gf_false;
+
+ if (gf_string2boolean (data_self_heal, &enabled) == -1) {
+ enabled = !strcmp (data_self_heal, "open");
+ GF_ASSERT (enabled);
+ }
+
+ return enabled;
+}
+
+static void
+afr_lookup_set_self_heal_params (afr_local_t *local, xlator_t *this)
+{
+ int i = 0;
+ struct iatt *bufs = NULL;
+ dict_t **xattr = NULL;
+ afr_private_t *priv = NULL;
+ int32_t child1 = -1;
+ int32_t child2 = -1;
+ afr_self_heal_t *sh = NULL;
+
+ priv = this->private;
+ sh = &local->self_heal;
+
+ afr_detect_self_heal_by_lookup_status (local, this);
+
+ if (afr_lookup_gfid_missing_count (local, this))
+ local->self_heal.do_gfid_self_heal = _gf_true;
+
+ if (_gf_true == afr_lookup_conflicting_entries (local, this))
+ local->self_heal.do_missing_entry_self_heal = _gf_true;
+ else
+ afr_update_gfid_from_iatts (local->self_heal.sh_gfid_req,
+ local->cont.lookup.bufs,
+ local->cont.lookup.success_children,
+ priv->child_count);
+
+ bufs = local->cont.lookup.bufs;
+ for (i = 1; i < local->success_count; i++) {
+ child1 = local->cont.lookup.success_children[i-1];
+ child2 = local->cont.lookup.success_children[i];
+ afr_detect_self_heal_by_iatt (local, this,
+ &bufs[child1], &bufs[child2]);
+ }
+
+ xattr = local->cont.lookup.xattrs;
+ for (i = 0; i < local->success_count; i++) {
+ child1 = local->cont.lookup.success_children[i];
+ afr_lookup_set_self_heal_params_by_xattr (local, this,
+ xattr[child1]);
+ }
+ if (afr_open_only_data_self_heal (priv->data_self_heal))
+ sh->do_data_self_heal = _gf_false;
+ if (sh->do_metadata_self_heal)
+ afr_lookup_check_set_metadata_split_brain (local, this);
+ afr_detect_self_heal_by_split_brain_status (local, this);
+}
+
+int
+afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ int32_t sh_failed)
+{
+ afr_local_t *local = NULL;
+ int ret = -1;
+ dict_t *xattr = NULL;
+
+ local = frame->local;
+
+ if (op_ret == -1) {
+ local->op_ret = -1;
+ local->op_errno = afr_most_important_error(local->op_errno,
+ op_errno, _gf_true);
+
+ goto out;
+ } else {
+ local->op_ret = 0;
+ }
+
+ afr_lookup_done_success_action (frame, this, _gf_true);
+ xattr = local->cont.lookup.xattr;
+ if (xattr) {
+ ret = dict_set_int32 (xattr, "sh-failed", sh_failed);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "%s: Failed to set "
+ "sh-failed to %d", local->loc.path, sh_failed);
+
+ if (local->self_heal.actual_sh_started == _gf_true &&
+ sh_failed == 0) {
+ ret = dict_set_int32 (xattr, "actual-sh-done", 1);
+ if (ret)
+ gf_log(this->name, GF_LOG_ERROR, "%s: Failed to"
+ " set actual-sh-done to %d",
+ local->loc.path,
+ local->self_heal.actual_sh_started);
+ }
+ }
+out:
+ AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
+ local->cont.lookup.inode, &local->cont.lookup.buf,
+ local->cont.lookup.xattr,
+ &local->cont.lookup.postparent);
+
+ return 0;
+}
+
+//TODO: At the moment only lookup needs this, so not doing any checks, in the
+// future we will have to do fop specific operations
+void
+afr_post_gfid_sh_success (call_frame_t *sh_frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_local_t *sh_local = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+ int i = 0;
+ struct iatt *lookup_bufs = NULL;
+ struct iatt *lookup_parentbufs = NULL;
+
+ sh_local = sh_frame->local;
+ sh = &sh_local->self_heal;
+ local = sh->orig_frame->local;
+ lookup_bufs = local->cont.lookup.bufs;
+ lookup_parentbufs = local->cont.lookup.postparents;
+ priv = this->private;
+
+ memcpy (lookup_bufs, sh->buf, priv->child_count * sizeof (*sh->buf));
+ memcpy (lookup_parentbufs, sh->parentbufs,
+ priv->child_count * sizeof (*sh->parentbufs));
+
+ afr_reset_xattr (local->cont.lookup.xattrs, priv->child_count);
+ if (local->cont.lookup.xattr) {
+ dict_unref (local->cont.lookup.xattr);
+ local->cont.lookup.xattr = NULL;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->xattr[i])
+ local->cont.lookup.xattrs[i] = dict_ref (sh->xattr[i]);
+ }
+
+ afr_reset_children (local->cont.lookup.success_children,
+ priv->child_count);
+ afr_children_copy (local->cont.lookup.success_children,
+ sh->fresh_children, priv->child_count);
+}
+
+static void
+afr_lookup_perform_self_heal (call_frame_t *frame, xlator_t *this,
+ gf_boolean_t *sh_launched)
+{
+ unsigned int up_count = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ char *reason = NULL;
+
+ GF_ASSERT (sh_launched);
+ *sh_launched = _gf_false;
+ priv = this->private;
+ local = frame->local;
+
+ up_count = afr_up_children_count (local->child_up, priv->child_count);
+ if (up_count == 1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Only 1 child up - do not attempt to detect self heal");
+ goto out;
+ }
+
+ afr_lookup_set_self_heal_params (local, this);
+ if (afr_can_self_heal_proceed (&local->self_heal, priv)) {
+ if (afr_is_transaction_running (local) &&
+ (!local->allow_sh_for_running_transaction))
+ goto out;
+
+ reason = "lookup detected pending operations";
+ afr_launch_self_heal (frame, this, local->cont.lookup.inode,
+ _gf_true, local->cont.lookup.buf.ia_type,
+ reason, afr_post_gfid_sh_success,
+ afr_self_heal_lookup_unwind);
+ *sh_launched = _gf_true;
+ }
+out:
+ return;
+}
+
+void
+afr_get_fresh_children (int32_t *success_children, int32_t *sources,
+ int32_t *fresh_children, unsigned int child_count)
+{
+ unsigned int i = 0;
+ unsigned int j = 0;
+
+ GF_ASSERT (success_children);
+ GF_ASSERT (sources);
+ GF_ASSERT (fresh_children);
+
+ afr_reset_children (fresh_children, child_count);
+ for (i = 0; i < child_count; i++) {
+ if (success_children[i] == -1)
+ break;
+ if (afr_is_read_child (success_children, sources, child_count,
+ success_children[i])) {
+ fresh_children[j] = success_children[i];
+ j++;
+ }
+ }
+}
+
+static int
+afr_lookup_set_read_ctx (afr_local_t *local, xlator_t *this, int32_t read_child)
+{
+ afr_private_t *priv = NULL;
+
+ GF_ASSERT (read_child >= 0);
+
+ priv = this->private;
+ afr_get_fresh_children (local->cont.lookup.success_children,
+ local->cont.lookup.sources,
+ local->fresh_children, priv->child_count);
+ afr_inode_set_read_ctx (this, local->cont.lookup.inode, read_child,
+ local->fresh_children);
+
+ return 0;
+}
+
+int
+afr_lookup_done_success_action (call_frame_t *frame, xlator_t *this,
+ gf_boolean_t fail_conflict)
+{
+ int32_t read_child = -1;
+ int32_t ret = -1;
+ afr_local_t *local = NULL;
+ gf_boolean_t fresh_lookup = _gf_false;
+
+ local = frame->local;
+ fresh_lookup = local->cont.lookup.fresh_lookup;
+
+ if (local->loc.parent == NULL)
+ fail_conflict = _gf_true;
+
+ if (afr_lookup_conflicting_entries (local, this)) {
+ if (fail_conflict == _gf_false)
+ ret = 0;
+ goto out;
+ }
+
+ ret = afr_lookup_select_read_child (local, this, &read_child);
+ if (!afr_is_transaction_running (local) || fresh_lookup) {
+ if (read_child < 0)
+ goto out;
+
+ ret = afr_lookup_set_read_ctx (local, this, read_child);
+ if (ret)
+ goto out;
+ }
+
+ ret = afr_lookup_build_response_params (local, this);
+ if (ret)
+ goto out;
+ afr_update_loc_gfids (&local->loc,
+ &local->cont.lookup.buf,
+ &local->cont.lookup.postparent);
+
+ ret = 0;
+out:
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ }
+ return ret;
+}
+
+int
+afr_lookup_get_latest_subvol (afr_local_t *local, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int32_t *success_children = NULL;
+ struct iatt *bufs = NULL;
+ int i = 0;
+ int child = 0;
+ int lsubvol = -1;
+
+ priv = this->private;
+ success_children = local->cont.lookup.success_children;
+ bufs = local->cont.lookup.bufs;
+ for (i = 0; i < priv->child_count; i++) {
+ child = success_children[i];
+ if (child == -1)
+ break;
+ if (uuid_is_null (bufs[child].ia_gfid))
+ continue;
+ if (lsubvol < 0) {
+ lsubvol = child;
+ } else if (bufs[lsubvol].ia_ctime < bufs[child].ia_ctime) {
+ lsubvol = child;
+ } else if ((bufs[lsubvol].ia_ctime == bufs[child].ia_ctime) &&
+ (bufs[lsubvol].ia_ctime_nsec < bufs[child].ia_ctime_nsec)) {
+ lsubvol = child;
+ }
+ }
+ return lsubvol;
+}
+
+void
+afr_lookup_mark_other_entries_stale (afr_local_t *local, xlator_t *this,
+ int subvol)
+{
+ afr_private_t *priv = NULL;
+ int32_t *success_children = NULL;
+ struct iatt *bufs = NULL;
+ int i = 0;
+ int child = 0;
+
+ priv = this->private;
+ success_children = local->cont.lookup.success_children;
+ bufs = local->cont.lookup.bufs;
+ memcpy (local->fresh_children, success_children,
+ sizeof (*success_children) * priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ child = local->fresh_children[i];
+ if (child == -1)
+ break;
+ if (child == subvol)
+ continue;
+ if (uuid_is_null (bufs[child].ia_gfid) &&
+ (bufs[child].ia_type == bufs[subvol].ia_type))
+ continue;
+ afr_children_rm_child (success_children, child,
+ priv->child_count);
+ local->success_count--;
+ }
+ afr_reset_children (local->fresh_children, priv->child_count);
+}
+
+void
+afr_succeed_lookup_on_latest_iatt (afr_local_t *local, xlator_t *this)
+{
+ int lsubvol = 0;
+
+ if (!afr_lookup_conflicting_entries (local, this))
+ goto out;
+
+ lsubvol = afr_lookup_get_latest_subvol (local, this);
+ if (lsubvol < 0)
+ goto out;
+ afr_lookup_mark_other_entries_stale (local, this, lsubvol);
+out:
+ return;
+}
+
+gf_boolean_t
+afr_is_entry_possibly_under_creation (afr_local_t *local, xlator_t *this)
+{
+ /*
+ * We need to perform this test in lookup done and treat on going
+ * create/DELETE as ENOENT.
+ * Reason:
+ Multiple clients A, B and C are attempting 'mkdir -p /mnt/a/b/c'
+
+ 1 Client A is in the middle of mkdir(/a). It has acquired lock.
+ It has performed mkdir(/a) on one subvol, and second one is still
+ in progress
+ 2 Client B performs a lookup, sees directory /a on one,
+ ENOENT on the other, succeeds lookup.
+ 3 Client B performs lookup on /a/b on both subvols, both return ENOENT
+ (one subvol because /a/b does not exist, another because /a
+ itself does not exist)
+ 4 Client B proceeds to mkdir /a/b. It obtains entrylk on inode=/a with
+ basename=b on one subvol, but fails on other subvol as /a is yet to
+ be created by Client A.
+ 5 Client A finishes mkdir of /a on other subvol
+ 6 Client C also attempts to create /a/b, lookup returns ENOENT on
+ both subvols.
+ 7 Client C tries to obtain entrylk on on inode=/a with basename=b,
+ obtains on one subvol (where B had failed), and waits for B to unlock
+ on other subvol.
+ 8 Client B finishes mkdir() on one subvol with GFID-1 and completes
+ transaction and unlocks
+ 9 Client C gets the lock on the second subvol, At this stage second
+ subvol already has /a/b created from Client B, but Client C does not
+ check that in the middle of mkdir transaction
+ 10 Client C attempts mkdir /a/b on both subvols. It succeeds on
+ ONLY ONE (where Client B could not get lock because of
+ missing parent /a dir) with GFID-2, and gets EEXIST from ONE subvol.
+ This way we have /a/b in GFID mismatch. One subvol got GFID-1 because
+ Client B performed transaction on only one subvol (because entrylk()
+ could not be obtained on second subvol because of missing parent dir --
+ caused by premature/speculative succeeding of lookup() on /a when locks
+ are detected). Other subvol gets GFID-2 from Client C because while
+ it was waiting for entrylk() on both subvols, Client B was in the
+ middle of creating mkdir() on only one subvol, and Client C does not
+ "expect" this when it is between lock() and pre-op()/op() phase of the
+ transaction.
+ */
+ if (local->cont.lookup.parent_entrylk && local->enoent_count)
+ return _gf_true;
+
+ return _gf_false;
+}
+
+
+static void
+afr_lookup_done (call_frame_t *frame, xlator_t *this)
+{
+ int unwind = 1;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ gf_boolean_t sh_launched = _gf_false;
+ gf_boolean_t fail_conflict = _gf_false;
+ int gfid_miss_count = 0;
+ int enotconn_count = 0;
+ int up_children_count = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (afr_is_entry_possibly_under_creation (local, this)) {
+ local->op_ret = -1;
+ local->op_errno = ENOENT;
+ goto unwind;
+ }
+
+ if (local->op_ret < 0)
+ goto unwind;
+
+ if (local->cont.lookup.parent_entrylk && local->success_count > 1)
+ afr_succeed_lookup_on_latest_iatt (local, this);
+
+ gfid_miss_count = afr_lookup_gfid_missing_count (local, this);
+ up_children_count = afr_up_children_count (local->child_up,
+ priv->child_count);
+ enotconn_count = priv->child_count - up_children_count;
+ if ((gfid_miss_count == local->success_count) &&
+ (enotconn_count > 0)) {
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ gf_log (this->name, GF_LOG_ERROR, "Failing lookup for %s, "
+ "LOOKUP on a file without gfid is not allowed when "
+ "some of the children are down", local->loc.path);
+ goto unwind;
+ }
+
+ if ((gfid_miss_count == local->success_count) &&
+ uuid_is_null (local->cont.lookup.gfid_req)) {
+ local->op_ret = -1;
+ local->op_errno = ENODATA;
+ gf_log (this->name, GF_LOG_ERROR, "%s: No gfid present",
+ local->loc.path);
+ goto unwind;
+ }
+
+ if (gfid_miss_count && uuid_is_null (local->cont.lookup.gfid_req))
+ fail_conflict = _gf_true;
+ ret = afr_lookup_done_success_action (frame, this, fail_conflict);
+ if (ret)
+ goto unwind;
+ uuid_copy (local->self_heal.sh_gfid_req, local->cont.lookup.gfid_req);
+
+ afr_lookup_perform_self_heal (frame, this, &sh_launched);
+ if (sh_launched) {
+ unwind = 0;
+ goto unwind;
+ }
+
+ unwind:
+ if (unwind) {
+ AFR_STACK_UNWIND (lookup, frame, local->op_ret,
+ local->op_errno, local->cont.lookup.inode,
+ &local->cont.lookup.buf,
+ local->cont.lookup.xattr,
+ &local->cont.lookup.postparent);
+ }
+}
+
+/*
+ * During a lookup, some errors are more "important" than
+ * others in that they must be given higher priority while
+ * returning to the user.
+ *
+ * The hierarchy is ESTALE > EIO > ENOENT > others
+ */
+int32_t
+afr_most_important_error(int32_t old_errno, int32_t new_errno,
+ gf_boolean_t eio)
+{
+ if (old_errno == ESTALE || new_errno == ESTALE)
+ return ESTALE;
+ if (eio && (old_errno == EIO || new_errno == EIO))
+ return EIO;
+ if (old_errno == ENOENT || new_errno == ENOENT)
+ return ENOENT;
+
+ return new_errno;
+}
+
+int32_t
+afr_resultant_errno_get (int32_t *children,
+ int *child_errno, unsigned int child_count)
+{
+ int i = 0;
+ int32_t op_errno = 0;
+ int child = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (children) {
+ child = children[i];
+ if (child == -1)
+ break;
+ } else {
+ child = i;
+ }
+ op_errno = afr_most_important_error(op_errno,
+ child_errno[child],
+ _gf_false);
+ }
+ return op_errno;
+}
+
+static void
+afr_lookup_handle_error (afr_local_t *local, int32_t op_ret, int32_t op_errno)
+{
+ GF_ASSERT (local);
+ if (op_errno == ENOENT)
+ local->enoent_count++;
+
+ local->op_errno = afr_most_important_error(local->op_errno, op_errno,
+ _gf_false);
+
+ if (local->op_errno == ESTALE) {
+ local->op_ret = -1;
+ }
+}
+
+static void
+afr_set_root_inode_on_first_lookup (afr_local_t *local, xlator_t *this,
+ inode_t *inode)
+{
+ afr_private_t *priv = NULL;
+ GF_ASSERT (inode);
+
+ if (!__is_root_gfid (inode->gfid))
+ goto out;
+ if (!afr_is_fresh_lookup (&local->loc, this))
+ goto out;
+ priv = this->private;
+ if ((priv->first_lookup)) {
+ gf_log (this->name, GF_LOG_INFO, "added root inode");
+ priv->root_inode = inode_ref (inode);
+ priv->first_lookup = 0;
+ }
+out:
+ return;
+}
+
+static void
+afr_lookup_cache_args (afr_local_t *local, int child_index, dict_t *xattr,
+ struct iatt *buf, struct iatt *postparent)
+{
+ GF_ASSERT (child_index >= 0);
+ local->cont.lookup.xattrs[child_index] = dict_ref (xattr);
+ local->cont.lookup.postparents[child_index] = *postparent;
+ local->cont.lookup.bufs[child_index] = *buf;
+}
+
+static void
+afr_lookup_handle_first_success (afr_local_t *local, xlator_t *this,
+ inode_t *inode, struct iatt *buf)
+{
+ local->cont.lookup.inode = inode_ref (inode);
+ local->cont.lookup.buf = *buf;
+ afr_set_root_inode_on_first_lookup (local, this, inode);
+}
+
+static int32_t
+afr_discovery_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ int ret = 0;
+ char *pathinfo = NULL;
+ gf_boolean_t is_local = _gf_false;
+ afr_private_t *priv = NULL;
+ int32_t child_index = -1;
+
+ if (op_ret != 0) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, GF_XATTR_PATHINFO_KEY, &pathinfo);
+ if (ret != 0) {
+ goto out;
+ }
+
+ ret = afr_local_pathinfo (pathinfo, &is_local);
+ if (ret) {
+ goto out;
+ }
+
+ priv = this->private;
+ /*
+ * Note that one local subvolume will override another here. The only
+ * way to avoid that would be to retain extra information about whether
+ * the previous read_child is local, and it's just not worth it. Even
+ * the slowest local subvolume is far preferable to a remote one.
+ */
+ if (is_local) {
+ child_index = (int32_t)(long)cookie;
+ gf_log (this->name, GF_LOG_INFO,
+ "selecting local read_child %s",
+ priv->children[child_index]->name);
+ priv->read_child = child_index;
+ }
+
+out:
+ STACK_DESTROY(frame->root);
+ return 0;
+}
+
+static void
+afr_attempt_local_discovery (xlator_t *this, int32_t child_index)
+{
+ call_frame_t *newframe = NULL;
+ loc_t tmploc = {0,};
+ afr_private_t *priv = this->private;
+
+ newframe = create_frame(this,this->ctx->pool);
+ if (!newframe) {
+ return;
+ }
+
+ tmploc.gfid[sizeof(tmploc.gfid)-1] = 1;
+ STACK_WIND_COOKIE (newframe, afr_discovery_cbk,
+ (void *)(long)child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->getxattr,
+ &tmploc, GF_XATTR_PATHINFO_KEY, NULL);
+}
+
+static void
+afr_lookup_handle_success (afr_local_t *local, xlator_t *this, int32_t child_index,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ afr_private_t *priv = this->private;
+
+ if (local->success_count == 0) {
+ if (local->op_errno != ESTALE) {
+ local->op_ret = op_ret;
+ local->op_errno = 0;
+ }
+ afr_lookup_handle_first_success (local, this, inode, buf);
+ }
+ afr_lookup_update_lk_counts (local, this,
+ child_index, xattr);
+
+ afr_lookup_cache_args (local, child_index, xattr,
+ buf, postparent);
+
+ if (local->do_discovery && (priv->read_child == (-1))) {
+ afr_attempt_local_discovery(this,child_index);
+ }
+
+ local->cont.lookup.success_children[local->success_count] = child_index;
+ local->success_count++;
+}
+
+int
+afr_lookup_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ afr_local_t * local = NULL;
+ int call_count = -1;
+ int child_index = -1;
+
+ child_index = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ local = frame->local;
+
+ if (op_ret == -1) {
+ afr_lookup_handle_error (local, op_ret, op_errno);
+ goto unlock;
+ }
+ afr_lookup_handle_success (local, this, child_index, op_ret,
+ op_errno, inode, buf, xattr,
+ postparent);
+
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ afr_lookup_done (frame, this);
+ }
+
+ return 0;
+}
+
+int
+afr_lookup_cont_init (afr_local_t *local, unsigned int child_count)
+{
+ int ret = -ENOMEM;
+ struct iatt *iatts = NULL;
+ int32_t *success_children = NULL;
+ int32_t *sources = NULL;
+ int32_t **pending_matrix = NULL;
+
+ GF_ASSERT (local);
+ local->cont.lookup.xattrs = GF_CALLOC (child_count,
+ sizeof (*local->cont.lookup.xattr),
+ gf_afr_mt_dict_t);
+ if (NULL == local->cont.lookup.xattrs)
+ goto out;
+
+ iatts = GF_CALLOC (child_count, sizeof (*iatts), gf_afr_mt_iatt);
+ if (NULL == iatts)
+ goto out;
+ local->cont.lookup.postparents = iatts;
+
+ iatts = GF_CALLOC (child_count, sizeof (*iatts), gf_afr_mt_iatt);
+ if (NULL == iatts)
+ goto out;
+ local->cont.lookup.bufs = iatts;
+
+ success_children = afr_children_create (child_count);
+ if (NULL == success_children)
+ goto out;
+ local->cont.lookup.success_children = success_children;
+
+ local->fresh_children = afr_children_create (child_count);
+ if (NULL == local->fresh_children)
+ goto out;
+
+ sources = GF_CALLOC (sizeof (*sources), child_count, gf_afr_mt_int32_t);
+ if (NULL == sources)
+ goto out;
+ local->cont.lookup.sources = sources;
+
+ pending_matrix = afr_matrix_create (child_count, child_count);
+ if (NULL == pending_matrix)
+ goto out;
+ local->cont.lookup.pending_matrix = pending_matrix;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+afr_lookup (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xattr_req)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ void *gfid_req = NULL;
+ int ret = -1;
+ int i = 0;
+ int call_count = 0;
+ uint64_t ctx = 0;
+ int32_t op_errno = 0;
+ int allow_sh = 0;
+ priv = this->private;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (local, out);
+
+ local->op_ret = -1;
+
+ frame->local = local;
+ local->fop = GF_FOP_LOOKUP;
+
+ loc_copy (&local->loc, loc);
+ ret = loc_path (&local->loc, NULL);
+ if (ret < 0) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (local->loc.path &&
+ (strcmp (local->loc.path, "/" GF_REPLICATE_TRASH_DIR) == 0)) {
+ op_errno = EPERM;
+ ret = -1;
+ goto out;
+ }
+
+ ret = inode_ctx_get (local->loc.inode, this, &ctx);
+ if (ret == 0) {
+ /* lookup is a revalidate */
+
+ local->read_child_index = afr_inode_get_read_ctx (this,
+ local->loc.inode,
+ NULL);
+ } else {
+ LOCK (&priv->read_child_lock);
+ {
+ if (priv->hash_mode) {
+ local->read_child_index = -1;
+ }
+ else {
+ local->read_child_index =
+ (++priv->read_child_rr) %
+ (priv->child_count);
+ }
+ }
+ UNLOCK (&priv->read_child_lock);
+ local->cont.lookup.fresh_lookup = _gf_true;
+ }
+
+ local->child_up = memdup (priv->child_up,
+ sizeof (*local->child_up) * priv->child_count);
+ if (NULL == local->child_up) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ ret = afr_lookup_cont_init (local, priv->child_count);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ local->call_count = afr_up_children_count (local->child_up,
+ priv->child_count);
+ call_count = local->call_count;
+ if (local->call_count == 0) {
+ ret = -1;
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ /* By default assume ENOTCONN. On success it will be set to 0. */
+ local->op_errno = ENOTCONN;
+
+ ret = dict_get_int32 (xattr_req, "allow-sh-for-running-transaction",
+ &allow_sh);
+ dict_del (xattr_req, "allow-sh-for-running-transaction");
+ local->allow_sh_for_running_transaction = allow_sh;
+
+ ret = afr_lookup_xattr_req_prepare (local, this, xattr_req, &local->loc,
+ &gfid_req);
+ if (ret) {
+ local->op_errno = -ret;
+ goto out;
+ }
+ afr_lookup_save_gfid (local->cont.lookup.gfid_req, gfid_req,
+ &local->loc);
+ local->fop = GF_FOP_LOOKUP;
+ if (priv->choose_local && !priv->did_discovery) {
+ if (gfid_req && __is_root_gfid(gfid_req)) {
+ local->do_discovery = _gf_true;
+ priv->did_discovery = _gf_true;
+ }
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_lookup_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->lookup,
+ &local->loc, local->xattr_req);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ ret = 0;
+out:
+ if (ret)
+ AFR_STACK_UNWIND (lookup, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+/* {{{ open */
+
+int
+__afr_fd_ctx_set (xlator_t *this, fd_t *fd)
+{
+ afr_private_t * priv = NULL;
+ int ret = -1;
+ uint64_t ctx = 0;
+ afr_fd_ctx_t * fd_ctx = NULL;
+
+ VALIDATE_OR_GOTO (this->private, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ priv = this->private;
+
+ ret = __fd_ctx_get (fd, this, &ctx);
+
+ if (ret == 0)
+ goto out;
+
+ fd_ctx = GF_CALLOC (1, sizeof (afr_fd_ctx_t),
+ gf_afr_mt_afr_fd_ctx_t);
+ if (!fd_ctx) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ fd_ctx->pre_op_done = GF_CALLOC (sizeof (*fd_ctx->pre_op_done),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->pre_op_done) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ fd_ctx->pre_op_piggyback = GF_CALLOC (sizeof (*fd_ctx->pre_op_piggyback),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->pre_op_piggyback) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ fd_ctx->opened_on = GF_CALLOC (sizeof (*fd_ctx->opened_on),
+ priv->child_count,
+ gf_afr_mt_int32_t);
+ if (!fd_ctx->opened_on) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ fd_ctx->lock_piggyback = GF_CALLOC (sizeof (*fd_ctx->lock_piggyback),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->lock_piggyback) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ fd_ctx->lock_acquired = GF_CALLOC (sizeof (*fd_ctx->lock_acquired),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->lock_acquired) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ fd_ctx->up_count = priv->up_count;
+ fd_ctx->down_count = priv->down_count;
+
+ fd_ctx->locked_on = GF_CALLOC (sizeof (*fd_ctx->locked_on),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->locked_on) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ pthread_mutex_init (&fd_ctx->delay_lock, NULL);
+ INIT_LIST_HEAD (&fd_ctx->entries);
+ fd_ctx->call_child = -1;
+
+ INIT_LIST_HEAD (&fd_ctx->eager_locked);
+
+ ret = __fd_ctx_set (fd, this, (uint64_t)(long) fd_ctx);
+ if (ret)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to set fd ctx (%p)", fd);
+out:
+ return ret;
+}
+
+
+int
+afr_fd_ctx_set (xlator_t *this, fd_t *fd)
+{
+ int ret = -1;
+
+ LOCK (&fd->lock);
+ {
+ ret = __afr_fd_ctx_set (this, fd);
+ }
+ UNLOCK (&fd->lock);
+
+ return ret;
+}
+
+/* {{{ flush */
+
+int
+afr_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t * local = NULL;
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret != -1) {
+ if (local->success_count == 0) {
+ local->op_ret = op_ret;
+ }
+ local->success_count++;
+ }
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND(flush, frame, local->op_ret,
+ local->op_errno, NULL);
+
+ return 0;
+}
+
+static int
+afr_flush_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+
+ priv = this->private;
+ local = frame->local;
+ call_count = local->call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_flush_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->flush,
+ local->fd, NULL);
+ if (!--call_count)
+ break;
+
+ }
+ }
+
+ return 0;
+}
+
+int
+afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+ int ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init(local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ local->fd = fd_ref(fd);
+ stub = fop_flush_stub (frame, afr_flush_wrapper, fd, xdata);
+ if (!stub) {
+ ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ afr_delayed_changelog_wake_resume (this, fd, stub);
+ ret = 0;
+
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND(flush, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+/* }}} */
+
+
+int
+afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd)
+{
+ uint64_t ctx = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int ret = 0;
+
+ ret = fd_ctx_get (fd, this, &ctx);
+ if (ret < 0)
+ goto out;
+
+ fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+
+ if (fd_ctx) {
+ GF_FREE (fd_ctx->pre_op_done);
+
+ GF_FREE (fd_ctx->opened_on);
+
+ GF_FREE (fd_ctx->locked_on);
+
+ GF_FREE (fd_ctx->pre_op_piggyback);
+ GF_FREE (fd_ctx->lock_piggyback);
+
+ GF_FREE (fd_ctx->lock_acquired);
+
+ pthread_mutex_destroy (&fd_ctx->delay_lock);
+
+ GF_FREE (fd_ctx);
+ }
+
+out:
+ return 0;
+}
+
+
+int
+afr_release (xlator_t *this, fd_t *fd)
+{
+ afr_locked_fd_t *locked_fd = NULL;
+ afr_locked_fd_t *tmp = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ afr_cleanup_fd_ctx (this, fd);
+
+ list_for_each_entry_safe (locked_fd, tmp, &priv->saved_fds,
+ list) {
+
+ if (locked_fd->fd == fd) {
+ list_del_init (&locked_fd->list);
+ GF_FREE (locked_fd);
+ }
+
+ }
+
+ return 0;
+}
+
+
+/* {{{ fsync */
+
+int
+afr_fsync_unwind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ AFR_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int
+afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int call_count = -1;
+ int child_index = (long) cookie;
+ int read_child = 0;
+ call_stub_t *stub = NULL;
+
+ local = frame->local;
+
+ read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
+
+ LOCK (&frame->lock);
+ {
+ if (child_index == read_child) {
+ local->read_child_returned = _gf_true;
+ }
+
+ if (op_ret == 0) {
+ local->op_ret = 0;
+
+ if (local->success_count == 0) {
+ local->cont.inode_wfop.prebuf = *prebuf;
+ local->cont.inode_wfop.postbuf = *postbuf;
+ }
+
+ if (child_index == read_child) {
+ local->cont.inode_wfop.prebuf = *prebuf;
+ local->cont.inode_wfop.postbuf = *postbuf;
+ }
+
+ local->success_count++;
+ }
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ /* Make a stub out of the frame, and register it
+ with the waking up post-op. When the call-stub resumes,
+ we are guaranteed that there was no post-op pending
+ (i.e changelogs were unset in the server). This is an
+ essential "guarantee", that fsync() returns only after
+ completely finishing EVERYTHING, including the delayed
+ post-op. This guarantee is expected by FUSE graph switching
+ for example.
+ */
+ stub = fop_fsync_cbk_stub (frame, afr_fsync_unwind_cbk,
+ local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ xdata);
+ if (!stub) {
+ AFR_STACK_UNWIND (fsync, frame, -1, ENOMEM, 0, 0, 0);
+ return 0;
+ }
+
+ /* If no new unstable writes happened between the
+ time we cleared the unstable write witness flag in afr_fsync
+ and now, calling afr_delayed_changelog_wake_up() should
+ wake up and skip over the fsync phase and go straight to
+ afr_changelog_post_op_now()
+ */
+ afr_delayed_changelog_wake_resume (this, local->fd, stub);
+ }
+
+ return 0;
+}
+
+
+int
+afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t datasync, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ call_count = local->call_count;
+
+ local->fd = fd_ref (fd);
+
+ if (afr_fd_has_witnessed_unstable_write (this, fd)) {
+ /* don't care. we only wanted to CLEAR the bit */
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_fsync_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fsync,
+ fd, datasync, xdata);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ fsync */
+
+int32_t
+afr_fsyncdir_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0)
+ local->op_ret = 0;
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (fsyncdir, frame, local->op_ret,
+ local->op_errno, xdata);
+
+ return 0;
+}
+
+
+int32_t
+afr_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t datasync, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ call_count = local->call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_fsyncdir_cbk,
+ priv->children[i],
+ priv->children[i]->fops->fsyncdir,
+ fd, datasync, xdata);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (fsyncdir, frame, -1, op_errno, NULL);
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ xattrop */
+
+int32_t
+afr_xattrop_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xattr, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0) {
+ if (!local->cont.xattrop.xattr)
+ local->cont.xattrop.xattr = dict_ref (xattr);
+ local->op_ret = 0;
+ }
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (xattrop, frame, local->op_ret, local->op_errno,
+ local->cont.xattrop.xattr, xdata);
+
+ return 0;
+}
+
+
+int32_t
+afr_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ call_count = local->call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_xattrop_cbk,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ loc, optype, xattr, xdata);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ fxattrop */
+
+int32_t
+afr_fxattrop_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xattr, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0) {
+ if (!local->cont.fxattrop.xattr)
+ local->cont.fxattrop.xattr = dict_ref (xattr);
+
+ local->op_ret = 0;
+ }
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (fxattrop, frame, local->op_ret, local->op_errno,
+ local->cont.fxattrop.xattr, xdata);
+
+ return 0;
+}
+
+
+int32_t
+afr_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ call_count = local->call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_fxattrop_cbk,
+ priv->children[i],
+ priv->children[i]->fops->fxattrop,
+ fd, optype, xattr, xdata);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+/* }}} */
+
+
+int32_t
+afr_inodelk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
+
+{
+ afr_local_t *local = NULL;
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0)
+ local->op_ret = 0;
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (inodelk, frame, local->op_ret,
+ local->op_errno, xdata);
+
+ return 0;
+}
+
+
+int32_t
+afr_inodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ call_count = local->call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_inodelk_cbk,
+ priv->children[i],
+ priv->children[i]->fops->inodelk,
+ volume, loc, cmd, flock, xdata);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (inodelk, frame, -1, op_errno, NULL);
+ return 0;
+}
+
+
+int32_t
+afr_finodelk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
+
+{
+ afr_local_t *local = NULL;
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0)
+ local->op_ret = 0;
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (finodelk, frame, local->op_ret,
+ local->op_errno, xdata);
+
+ return 0;
+}
+
+
+int32_t
+afr_finodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock,
+ dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ call_count = local->call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_finodelk_cbk,
+ priv->children[i],
+ priv->children[i]->fops->finodelk,
+ volume, fd, cmd, flock, xdata);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (finodelk, frame, -1, op_errno, NULL);
+ return 0;
+}
+
+
+int32_t
+afr_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0)
+ local->op_ret = 0;
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (entrylk, frame, local->op_ret,
+ local->op_errno, xdata);
+
+ return 0;
+}
+
+
+int32_t
+afr_entrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ call_count = local->call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_entrylk_cbk,
+ priv->children[i],
+ priv->children[i]->fops->entrylk,
+ volume, loc, basename, cmd, type, xdata);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (entrylk, frame, -1, op_errno, NULL);
+ return 0;
+}
+
+
+
+int32_t
+afr_fentrylk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
+
+{
+ afr_local_t *local = NULL;
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0)
+ local->op_ret = 0;
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (fentrylk, frame, local->op_ret,
+ local->op_errno, xdata);
+
+ return 0;
+}
+
+
+int32_t
+afr_fentrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd,
+ const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ call_count = local->call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_fentrylk_cbk,
+ priv->children[i],
+ priv->children[i]->fops->fentrylk,
+ volume, fd, basename, cmd, type, xdata);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (fentrylk, frame, -1, op_errno, NULL);
+ return 0;
+}
+
+int32_t
+afr_statfs_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ struct statvfs *statvfs, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int call_count = 0;
+
+ LOCK (&frame->lock);
+ {
+ local = frame->local;
+
+ if (op_ret == 0) {
+ local->op_ret = op_ret;
+
+ if (local->cont.statfs.buf_set) {
+ if (statvfs->f_bavail < local->cont.statfs.buf.f_bavail)
+ local->cont.statfs.buf = *statvfs;
+ } else {
+ local->cont.statfs.buf = *statvfs;
+ local->cont.statfs.buf_set = 1;
+ }
+ }
+
+ if (op_ret == -1)
+ local->op_errno = op_errno;
+
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (statfs, frame, local->op_ret, local->op_errno,
+ &local->cont.statfs.buf, xdata);
+
+ return 0;
+}
+
+
+int32_t
+afr_statfs (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xdata)
+{
+ afr_private_t * priv = NULL;
+ int child_count = 0;
+ afr_local_t * local = NULL;
+ int i = 0;
+ int ret = -1;
+ int call_count = 0;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+ VALIDATE_OR_GOTO (loc, out);
+
+ priv = this->private;
+ child_count = priv->child_count;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ call_count = local->call_count;
+
+ for (i = 0; i < child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_statfs_cbk,
+ priv->children[i],
+ priv->children[i]->fops->statfs,
+ loc, xdata);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+
+int32_t
+afr_lk_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ afr_local_t * local = NULL;
+ int call_count = -1;
+
+ local = frame->local;
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
+ lock, xdata);
+
+ return 0;
+}
+
+
+int32_t
+afr_lk_unlock (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ int i = 0;
+ int call_count = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_locked_nodes_count (local->cont.lk.locked_nodes,
+ priv->child_count);
+
+ if (call_count == 0) {
+ AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
+ &local->cont.lk.ret_flock, NULL);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ local->cont.lk.user_flock.l_type = F_UNLCK;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->cont.lk.locked_nodes[i]) {
+ STACK_WIND (frame, afr_lk_unlock_cbk,
+ priv->children[i],
+ priv->children[i]->fops->lk,
+ local->fd, F_SETLK,
+ &local->cont.lk.user_flock, NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int32_t
+afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int child_index = -1;
+/* int ret = 0; */
+
+
+ local = frame->local;
+ priv = this->private;
+
+ child_index = (long) cookie;
+
+ if (!child_went_down (op_ret, op_errno) && (op_ret == -1)) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+
+ afr_lk_unlock (frame, this);
+ return 0;
+ }
+
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ local->op_errno = 0;
+ local->cont.lk.locked_nodes[child_index] = 1;
+ local->cont.lk.ret_flock = *lock;
+ }
+
+ child_index++;
+
+ if (child_index < priv->child_count) {
+ STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->lk,
+ local->fd, local->cont.lk.cmd,
+ &local->cont.lk.user_flock, xdata);
+ } else if (local->op_ret == -1) {
+ /* all nodes have gone down */
+
+ AFR_STACK_UNWIND (lk, frame, -1, ENOTCONN,
+ &local->cont.lk.ret_flock, NULL);
+ } else {
+ /* locking has succeeded on all nodes that are up */
+
+ /* temporarily
+ ret = afr_mark_locked_nodes (this, local->fd,
+ local->cont.lk.locked_nodes);
+ if (ret)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Could not save locked nodes info in fdctx");
+
+ ret = afr_save_locked_fd (this, local->fd);
+ if (ret)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Could not save locked fd");
+
+ */
+ AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
+ &local->cont.lk.ret_flock, NULL);
+ }
+
+ return 0;
+}
+
+
+int
+afr_lk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+ int32_t op_errno = 0;
+ int ret = -1;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ local->cont.lk.locked_nodes = GF_CALLOC (priv->child_count,
+ sizeof (*local->cont.lk.locked_nodes),
+ gf_afr_mt_char);
+
+ if (!local->cont.lk.locked_nodes) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->fd = fd_ref (fd);
+ local->cont.lk.cmd = cmd;
+ local->cont.lk.user_flock = *flock;
+ local->cont.lk.ret_flock = *flock;
+
+ STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) 0,
+ priv->children[i],
+ priv->children[i]->fops->lk,
+ fd, cmd, flock, xdata);
+
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+int
+afr_forget (xlator_t *this, inode_t *inode)
+{
+ uint64_t ctx_addr = 0;
+ afr_inode_ctx_t *ctx = NULL;
+
+ inode_ctx_get (inode, this, &ctx_addr);
+
+ if (!ctx_addr)
+ goto out;
+
+ ctx = (afr_inode_ctx_t *)(long)ctx_addr;
+ GF_FREE (ctx->fresh_children);
+ GF_FREE (ctx);
+out:
+ return 0;
+}
+
+int
+afr_priv_dump (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i = 0;
+
+
+ GF_ASSERT (this);
+ priv = this->private;
+
+ GF_ASSERT (priv);
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
+ gf_proc_dump_add_section(key_prefix);
+ gf_proc_dump_write("child_count", "%u", priv->child_count);
+ gf_proc_dump_write("read_child_rr", "%u", priv->read_child_rr);
+ for (i = 0; i < priv->child_count; i++) {
+ sprintf (key, "child_up[%d]", i);
+ gf_proc_dump_write(key, "%d", priv->child_up[i]);
+ sprintf (key, "pending_key[%d]", i);
+ gf_proc_dump_write(key, "%s", priv->pending_key[i]);
+ }
+ gf_proc_dump_write("data_self_heal", "%s", priv->data_self_heal);
+ gf_proc_dump_write("metadata_self_heal", "%d", priv->metadata_self_heal);
+ gf_proc_dump_write("entry_self_heal", "%d", priv->entry_self_heal);
+ gf_proc_dump_write("data_change_log", "%d", priv->data_change_log);
+ gf_proc_dump_write("metadata_change_log", "%d", priv->metadata_change_log);
+ gf_proc_dump_write("entry-change_log", "%d", priv->entry_change_log);
+ gf_proc_dump_write("read_child", "%d", priv->read_child);
+ gf_proc_dump_write("favorite_child", "%d", priv->favorite_child);
+ gf_proc_dump_write("wait_count", "%u", priv->wait_count);
+
+ return 0;
+}
+
+
+/**
+ * find_child_index - find the child's index in the array of subvolumes
+ * @this: AFR
+ * @child: child
+ */
+
+static int
+find_child_index (xlator_t *this, xlator_t *child)
+{
+ afr_private_t *priv = NULL;
+ int i = -1;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if ((xlator_t *) child == priv->children[i])
+ break;
+ }
+
+ return i;
+}
+
+int32_t
+afr_notify (xlator_t *this, int32_t event,
+ void *data, void *data2)
+{
+ afr_private_t *priv = NULL;
+ int i = -1;
+ int up_children = 0;
+ int down_children = 0;
+ int propagate = 0;
+ int had_heard_from_all = 0;
+ int have_heard_from_all = 0;
+ int idx = -1;
+ int ret = -1;
+ int call_psh = 0;
+ int up_child = AFR_ALL_CHILDREN;
+ dict_t *input = NULL;
+ dict_t *output = NULL;
+
+ priv = this->private;
+
+ if (!priv)
+ return 0;
+
+ /*
+ * We need to reset this in case children come up in "staggered"
+ * fashion, so that we discover a late-arriving local subvolume. Note
+ * that we could end up issuing N lookups to the first subvolume, and
+ * O(N^2) overall, but N is small for AFR so it shouldn't be an issue.
+ */
+ priv->did_discovery = _gf_false;
+
+ had_heard_from_all = 1;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!priv->last_event[i]) {
+ had_heard_from_all = 0;
+ }
+ }
+
+ /* parent xlators dont need to know about every child_up, child_down
+ * because of afr ha. If all subvolumes go down, child_down has
+ * to be triggered. In that state when 1 subvolume comes up child_up
+ * needs to be triggered. dht optimizes revalidate lookup by sending
+ * it only to one of its subvolumes. When child up/down happens
+ * for afr's subvolumes dht should be notified by child_modified. The
+ * subsequent revalidate lookup happens on all the dht's subvolumes
+ * which triggers afr self-heals if any.
+ */
+ idx = find_child_index (this, data);
+ if (idx < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Received child_up "
+ "from invalid subvolume");
+ goto out;
+ }
+
+ switch (event) {
+ case GF_EVENT_CHILD_UP:
+ LOCK (&priv->lock);
+ {
+ /*
+ * This only really counts if the child was never up
+ * (value = -1) or had been down (value = 0). See
+ * comment at GF_EVENT_CHILD_DOWN for a more detailed
+ * explanation.
+ */
+ if (priv->child_up[idx] != 1) {
+ priv->up_count++;
+ }
+ priv->child_up[idx] = 1;
+
+ call_psh = 1;
+ up_child = idx;
+ for (i = 0; i < priv->child_count; i++)
+ if (priv->child_up[i] == 1)
+ up_children++;
+ if (up_children == 1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Subvolume '%s' came back up; "
+ "going online.", ((xlator_t *)data)->name);
+ } else {
+ event = GF_EVENT_CHILD_MODIFIED;
+ }
+
+ priv->last_event[idx] = event;
+ }
+ UNLOCK (&priv->lock);
+
+ break;
+
+ case GF_EVENT_CHILD_DOWN:
+ LOCK (&priv->lock);
+ {
+ /*
+ * If a brick is down when we start, we'll get a
+ * CHILD_DOWN to indicate its initial state. There
+ * was never a CHILD_UP in this case, so if we
+ * increment "down_count" the difference between than
+ * and "up_count" will no longer be the number of
+ * children that are currently up. This has serious
+ * implications e.g. for quorum enforcement, so we
+ * don't increment these values unless the event
+ * represents an actual state transition between "up"
+ * (value = 1) and anything else.
+ */
+ if (priv->child_up[idx] == 1) {
+ priv->down_count++;
+ }
+ priv->child_up[idx] = 0;
+
+ for (i = 0; i < priv->child_count; i++)
+ if (priv->child_up[i] == 0)
+ down_children++;
+ if (down_children == priv->child_count) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "All subvolumes are down. Going offline "
+ "until atleast one of them comes back up.");
+ } else {
+ event = GF_EVENT_CHILD_MODIFIED;
+ }
+
+ priv->last_event[idx] = event;
+ }
+ UNLOCK (&priv->lock);
+
+ break;
+
+ case GF_EVENT_CHILD_CONNECTING:
+ LOCK (&priv->lock);
+ {
+ priv->last_event[idx] = event;
+ }
+ UNLOCK (&priv->lock);
+
+ break;
+
+ case GF_EVENT_TRANSLATOR_OP:
+ input = data;
+ output = data2;
+ ret = afr_xl_op (this, input, output);
+ goto out;
+ break;
+
+ default:
+ propagate = 1;
+ break;
+ }
+
+ /* have all subvolumes reported status once by now? */
+ have_heard_from_all = 1;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!priv->last_event[i])
+ have_heard_from_all = 0;
+ }
+
+ /* if all subvols have reported status, no need to hide anything
+ or wait for anything else. Just propagate blindly */
+ if (have_heard_from_all)
+ propagate = 1;
+
+ if (!had_heard_from_all && have_heard_from_all) {
+ /* This is the first event which completes aggregation
+ of events from all subvolumes. If at least one subvol
+ had come up, propagate CHILD_UP, but only this time
+ */
+ event = GF_EVENT_CHILD_DOWN;
+
+ LOCK (&priv->lock);
+ {
+ up_children = afr_up_children_count (priv->child_up,
+ priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->last_event[i] == GF_EVENT_CHILD_UP) {
+ event = GF_EVENT_CHILD_UP;
+ break;
+ }
+
+ if (priv->last_event[i] ==
+ GF_EVENT_CHILD_CONNECTING) {
+ event = GF_EVENT_CHILD_CONNECTING;
+ /* continue to check other events for CHILD_UP */
+ }
+ }
+ }
+ UNLOCK (&priv->lock);
+ }
+
+ ret = 0;
+ if (propagate)
+ ret = default_notify (this, event, data);
+ if (call_psh && priv->shd.iamshd)
+ afr_proactive_self_heal ((void*) (long) up_child);
+
+out:
+ return ret;
+}
+
+int
+afr_first_up_child (unsigned char *child_up, size_t child_count)
+{
+ int ret = -1;
+ int i = 0;
+
+ GF_ASSERT (child_up);
+
+ for (i = 0; i < child_count; i++) {
+ if (child_up[i]) {
+ ret = i;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+int
+afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno)
+{
+ int ret = -1;
+
+ local->op_ret = -1;
+ local->op_errno = EUCLEAN;
+
+ local->child_up = GF_CALLOC (priv->child_count,
+ sizeof (*local->child_up),
+ gf_afr_mt_char);
+ if (!local->child_up) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ memcpy (local->child_up, priv->child_up,
+ sizeof (*local->child_up) * priv->child_count);
+ local->call_count = afr_up_children_count (local->child_up,
+ priv->child_count);
+ if (local->call_count == 0) {
+ gf_log (THIS->name, GF_LOG_INFO, "no subvolumes up");
+ if (op_errno)
+ *op_errno = ENOTCONN;
+ goto out;
+ }
+
+ local->child_errno = GF_CALLOC (priv->child_count,
+ sizeof (*local->child_errno),
+ gf_afr_mt_int32_t);
+ if (!local->child_errno) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->transaction.postop_piggybacked = GF_CALLOC (priv->child_count,
+ sizeof (int),
+ gf_afr_mt_int32_t);
+ if (!local->transaction.postop_piggybacked) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->append_write = _gf_false;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+afr_internal_lock_init (afr_internal_lock_t *lk, size_t child_count,
+ transaction_lk_type_t lk_type)
+{
+ int ret = -ENOMEM;
+
+ lk->locked_nodes = GF_CALLOC (sizeof (*lk->locked_nodes),
+ child_count, gf_afr_mt_char);
+ if (NULL == lk->locked_nodes)
+ goto out;
+
+ lk->lower_locked_nodes = GF_CALLOC (sizeof (*lk->lower_locked_nodes),
+ child_count, gf_afr_mt_char);
+ if (NULL == lk->lower_locked_nodes)
+ goto out;
+
+ lk->lock_op_ret = -1;
+ lk->lock_op_errno = EUCLEAN;
+ lk->transaction_lk_type = lk_type;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+void
+afr_matrix_cleanup (int32_t **matrix, unsigned int m)
+{
+ int i = 0;
+
+ if (!matrix)
+ goto out;
+ for (i = 0; i < m; i++) {
+ GF_FREE (matrix[i]);
+ }
+
+ GF_FREE (matrix);
+out:
+ return;
+}
+
+int32_t**
+afr_matrix_create (unsigned int m, unsigned int n)
+{
+ int32_t **matrix = NULL;
+ int i = 0;
+
+ matrix = GF_CALLOC (sizeof (*matrix), m, gf_afr_mt_int32_t);
+ if (!matrix)
+ goto out;
+
+ for (i = 0; i < m; i++) {
+ matrix[i] = GF_CALLOC (sizeof (*matrix[i]), n,
+ gf_afr_mt_int32_t);
+ if (!matrix[i])
+ goto out;
+ }
+ return matrix;
+out:
+ afr_matrix_cleanup (matrix, m);
+ return NULL;
+}
+
+int
+afr_inodelk_init (afr_inodelk_t *lk, char *dom, size_t child_count)
+{
+ int ret = -ENOMEM;
+
+ lk->domain = dom;
+ lk->locked_nodes = GF_CALLOC (sizeof (*lk->locked_nodes),
+ child_count, gf_afr_mt_char);
+ if (NULL == lk->locked_nodes)
+ goto out;
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+afr_transaction_local_init (afr_local_t *local, xlator_t *this)
+{
+ int child_up_count = 0;
+ int ret = -ENOMEM;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ ret = afr_internal_lock_init (&local->internal_lock, priv->child_count,
+ AFR_TRANSACTION_LK);
+ if (ret < 0)
+ goto out;
+
+ if ((local->transaction.type == AFR_DATA_TRANSACTION) ||
+ (local->transaction.type == AFR_METADATA_TRANSACTION)) {
+ ret = afr_inodelk_init (&local->internal_lock.inodelk[0],
+ this->name, priv->child_count);
+ if (ret < 0)
+ goto out;
+ }
+
+ ret = -ENOMEM;
+ child_up_count = afr_up_children_count (local->child_up,
+ priv->child_count);
+ if (priv->optimistic_change_log && child_up_count == priv->child_count)
+ local->optimistic_change_log = 1;
+
+ local->first_up_child = afr_first_up_child (local->child_up,
+ priv->child_count);
+
+ local->transaction.eager_lock =
+ GF_CALLOC (sizeof (*local->transaction.eager_lock),
+ priv->child_count,
+ gf_afr_mt_int32_t);
+
+ if (!local->transaction.eager_lock)
+ goto out;
+
+ local->fresh_children = afr_children_create (priv->child_count);
+ if (!local->fresh_children)
+ goto out;
+
+ local->transaction.pre_op = GF_CALLOC (sizeof (*local->transaction.pre_op),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!local->transaction.pre_op)
+ goto out;
+
+ local->pending = afr_matrix_create (priv->child_count,
+ AFR_NUM_CHANGE_LOGS);
+ if (!local->pending)
+ goto out;
+
+ local->transaction.txn_changelog = afr_matrix_create (priv->child_count,
+ AFR_NUM_CHANGE_LOGS);
+ if (!local->transaction.txn_changelog)
+ goto out;
+
+ INIT_LIST_HEAD (&local->transaction.eager_locked);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+void
+afr_reset_children (int32_t *fresh_children, int32_t child_count)
+{
+ unsigned int i = 0;
+ for (i = 0; i < child_count; i++)
+ fresh_children[i] = -1;
+}
+
+int32_t*
+afr_children_create (int32_t child_count)
+{
+ int32_t *children = NULL;
+ int i = 0;
+
+ GF_ASSERT (child_count > 0);
+
+ children = GF_CALLOC (child_count, sizeof (*children),
+ gf_afr_mt_int32_t);
+ if (NULL == children)
+ goto out;
+ for (i = 0; i < child_count; i++)
+ children[i] = -1;
+out:
+ return children;
+}
+
+void
+afr_children_add_child (int32_t *children, int32_t child,
+ int32_t child_count)
+{
+ gf_boolean_t child_found = _gf_false;
+ int i = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (children[i] == -1)
+ break;
+ if (children[i] == child) {
+ child_found = _gf_true;
+ break;
+ }
+ }
+
+ if (!child_found) {
+ GF_ASSERT (i < child_count);
+ children[i] = child;
+ }
+}
+
+void
+afr_children_rm_child (int32_t *children, int32_t child, int32_t child_count)
+{
+ int i = 0;
+
+ GF_ASSERT ((child >= 0) && (child < child_count));
+ for (i = 0; i < child_count; i++) {
+ if (children[i] == -1)
+ break;
+ if (children[i] == child) {
+ if (i != (child_count - 1))
+ memmove (children + i, children + i + 1,
+ sizeof (*children)*(child_count - i - 1));
+ children[child_count - 1] = -1;
+ break;
+ }
+ }
+}
+
+int
+afr_get_children_count (int32_t *children, unsigned int child_count)
+{
+ int count = 0;
+ int i = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (children[i] == -1)
+ break;
+ count++;
+ }
+ return count;
+}
+
+void
+afr_set_low_priority (call_frame_t *frame)
+{
+ frame->root->pid = LOW_PRIO_PROC_PID;
+}
+
+int
+afr_child_fd_ctx_set (xlator_t *this, fd_t *fd, int32_t child,
+ int flags)
+{
+ int ret = 0;
+ uint64_t ctx = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ GF_ASSERT (fd && fd->inode);
+ ret = afr_fd_ctx_set (this, fd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not set fd ctx for fd=%p", fd);
+ goto out;
+ }
+
+ ret = fd_ctx_get (fd, this, &ctx);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not get fd ctx for fd=%p", fd);
+ goto out;
+ }
+
+ fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ fd_ctx->opened_on[child] = AFR_FD_OPENED;
+ if (!IA_ISDIR (fd->inode->ia_type)) {
+ fd_ctx->flags = flags;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+gf_boolean_t
+afr_have_quorum (char *logname, afr_private_t *priv)
+{
+ unsigned int quorum = 0;
+
+ GF_VALIDATE_OR_GOTO(logname,priv,out);
+
+ quorum = priv->quorum_count;
+ if (quorum != AFR_QUORUM_AUTO) {
+ return (priv->up_count >= (priv->down_count + quorum));
+ }
+
+ quorum = priv->child_count / 2 + 1;
+ if (priv->up_count >= (priv->down_count + quorum)) {
+ return _gf_true;
+ }
+
+ /*
+ * Special case for even numbers of nodes: if we have exactly half
+ * and that includes the first ("senior-most") node, then that counts
+ * as quorum even if it wouldn't otherwise. This supports e.g. N=2
+ * while preserving the critical property that there can only be one
+ * such group.
+ */
+ if ((priv->child_count % 2) == 0) {
+ quorum = priv->child_count / 2;
+ if (priv->up_count >= (priv->down_count + quorum)) {
+ if (priv->child_up[0]) {
+ return _gf_true;
+ }
+ }
+ }
+
+out:
+ return _gf_false;
+}
+
+void
+afr_priv_destroy (afr_private_t *priv)
+{
+ int i = 0;
+
+ if (!priv)
+ goto out;
+ inode_unref (priv->root_inode);
+ GF_FREE (priv->shd.pos);
+ GF_FREE (priv->shd.pending);
+ GF_FREE (priv->shd.inprogress);
+// for (i = 0; i < priv->child_count; i++)
+// if (priv->shd.timer && priv->shd.timer[i])
+// gf_timer_call_cancel (this->ctx, priv->shd.timer[i]);
+ GF_FREE (priv->shd.timer);
+
+ if (priv->shd.healed)
+ eh_destroy (priv->shd.healed);
+
+ if (priv->shd.heal_failed)
+ eh_destroy (priv->shd.heal_failed);
+
+ if (priv->shd.split_brain)
+ eh_destroy (priv->shd.split_brain);
+
+ for (i = 0; i < priv->child_count; i++)
+ {
+ if (priv->shd.statistics[i])
+ eh_destroy (priv->shd.statistics[i]);
+ }
+
+ GF_FREE (priv->shd.statistics);
+
+ GF_FREE (priv->shd.crawl_events);
+
+ GF_FREE (priv->last_event);
+ if (priv->pending_key) {
+ for (i = 0; i < priv->child_count; i++)
+ GF_FREE (priv->pending_key[i]);
+ }
+ GF_FREE (priv->pending_key);
+ GF_FREE (priv->children);
+ GF_FREE (priv->child_up);
+ LOCK_DESTROY (&priv->lock);
+ LOCK_DESTROY (&priv->read_child_lock);
+ pthread_mutex_destroy (&priv->mutex);
+ GF_FREE (priv);
+out:
+ return;
+}
+
+int
+xlator_subvolume_count (xlator_t *this)
+{
+ int i = 0;
+ xlator_list_t *list = NULL;
+
+ for (list = this->children; list; list = list->next)
+ i++;
+ return i;
+}
+
+inline gf_boolean_t
+afr_is_errno_set (int *child_errno, int child)
+{
+ return child_errno[child];
+}
+
+inline gf_boolean_t
+afr_is_errno_unset (int *child_errno, int child)
+{
+ return !afr_is_errno_set (child_errno, child);
+}
+
+void
+afr_prepare_new_entry_pending_matrix (int32_t **pending,
+ gf_boolean_t (*is_pending) (int *, int),
+ int *ctx, struct iatt *buf,
+ unsigned int child_count)
+{
+ int midx = 0;
+ int idx = 0;
+ int i = 0;
+
+ midx = afr_index_for_transaction_type (AFR_METADATA_TRANSACTION);
+ if (IA_ISDIR (buf->ia_type))
+ idx = afr_index_for_transaction_type (AFR_ENTRY_TRANSACTION);
+ else if (IA_ISREG (buf->ia_type))
+ idx = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
+ else
+ idx = -1;
+ for (i = 0; i < child_count; i++) {
+ if (is_pending (ctx, i)) {
+ pending[i][midx] = hton32 (1);
+ if (idx == -1)
+ continue;
+ pending[i][idx] = hton32 (1);
+ }
+ }
+}
+
+gf_boolean_t
+afr_is_fd_fixable (fd_t *fd)
+{
+ if (!fd || !fd->inode)
+ return _gf_false;
+ else if (fd_is_anonymous (fd))
+ return _gf_false;
+ else if (uuid_is_null (fd->inode->gfid))
+ return _gf_false;
+
+ return _gf_true;
+}
+
+void
+afr_handle_open_fd_count (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ inode_t *inode = NULL;
+ afr_inode_ctx_t *ctx = NULL;
+
+ local = frame->local;
+
+ if (local->fd)
+ inode = local->fd->inode;
+ else
+ inode = local->loc.inode;
+
+ if (!inode)
+ return;
+
+ LOCK (&inode->lock);
+ {
+ ctx = __afr_inode_ctx_get (inode, this);
+ ctx->open_fd_count = local->open_fd_count;
+ }
+ UNLOCK (&inode->lock);
+}
+
+int
+afr_initialise_statistics (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ int i = 0;
+ int child_count = 0;
+ eh_t *stats_per_brick = NULL;
+ shd_crawl_event_t ***shd_crawl_events = NULL;
+ priv = this->private;
+
+ priv->shd.statistics = GF_CALLOC (sizeof(eh_t *), priv->child_count,
+ gf_common_mt_eh_t);
+ if (!priv->shd.statistics) {
+ ret = -1;
+ goto out;
+ }
+ child_count = priv->child_count;
+ for (i=0; i < child_count ; i++) {
+ stats_per_brick = eh_new (AFR_STATISTICS_HISTORY_SIZE,
+ _gf_false,
+ _destroy_crawl_event_data);
+ if (!stats_per_brick) {
+ ret = -1;
+ goto out;
+ }
+ priv->shd.statistics[i] = stats_per_brick;
+
+ }
+
+ shd_crawl_events = (shd_crawl_event_t***)(&priv->shd.crawl_events);
+ *shd_crawl_events = GF_CALLOC (sizeof(shd_crawl_event_t*),
+ priv->child_count,
+ gf_afr_mt_shd_crawl_event_t);
+
+ if (!priv->shd.crawl_events) {
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+
+}
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index f1638c740..689dd84e6 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -24,6 +15,7 @@
#include <sys/time.h>
#include <stdlib.h>
#include <signal.h>
+#include <string.h>
#ifndef _CONFIG_H
#define _CONFIG_H
@@ -42,293 +34,512 @@
#include "common-utils.h"
#include "compat-errno.h"
#include "compat.h"
+#include "checksum.h"
#include "afr.h"
+#include "afr-self-heal.h"
+#include "afr-self-heal-common.h"
-
-int32_t
-afr_opendir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- fd_t *fd)
+int
+afr_examine_dir_sh_unwind (call_frame_t *frame, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, int32_t sh_failed)
{
- afr_local_t * local = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ afr_set_opendir_done (this, local->fd->inode);
- int call_count = -1;
+ AFR_STACK_UNWIND (opendir, frame, local->op_ret,
+ local->op_errno, local->fd, NULL);
- LOCK (&frame->lock);
- {
- local = frame->local;
+ return 0;
+}
- if (op_ret == 0)
- local->op_ret = 0;
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+gf_boolean_t
+__checksums_differ (uint32_t *checksum, int child_count,
+ unsigned char *child_up)
+{
+ int ret = _gf_false;
+ int i = 0;
+ uint32_t cksum = 0;
+ gf_boolean_t activate_check = _gf_false;
+
+ for (i = 0; i < child_count; i++) {
+ if (!child_up[i])
+ continue;
+ if (_gf_false == activate_check) {
+ cksum = checksum[i];
+ activate_check = _gf_true;
+ continue;
+ }
- call_count = afr_frame_return (frame);
+ if (cksum != checksum[i]) {
+ ret = _gf_true;
+ break;
+ }
- if (call_count == 0) {
- AFR_STACK_UNWIND (frame, local->op_ret,
- local->op_errno, local->fd);
- }
+ cksum = checksum[i];
+ }
- return 0;
+ return ret;
}
-int32_t
-afr_opendir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, fd_t *fd)
+int32_t
+afr_examine_dir_readdir_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ gf_dirent_t *entries, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ afr_self_heal_t * sh = NULL;
+ gf_dirent_t * entry = NULL;
+ gf_dirent_t * tmp = NULL;
+ char *reason = NULL;
+ int child_index = 0;
+ uint32_t entry_cksum = 0;
+ int call_count = 0;
+ off_t last_offset = 0;
+ inode_t *inode = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+ inode = local->fd->inode;
- int child_count = 0;
- int i = 0;
+ child_index = (long) cookie;
- int ret = -1;
- int call_count = -1;
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%s: failed to do opendir on %s",
+ local->loc.path, priv->children[child_index]->name);
+ local->op_ret = -1;
+ local->op_ret = op_errno;
+ goto out;
+ }
+
+ if (op_ret == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s: no entries found in %s",
+ local->loc.path, priv->children[child_index]->name);
+ goto out;
+ }
+
+ list_for_each_entry_safe (entry, tmp, &entries->list, list) {
+ entry_cksum = gf_rsync_weak_checksum ((unsigned char *)entry->d_name,
+ strlen (entry->d_name));
+ local->cont.opendir.checksum[child_index] ^= entry_cksum;
+ }
+
+ list_for_each_entry (entry, &entries->list, list) {
+ last_offset = entry->d_off;
+ }
+
+ /* read more entries */
+
+ STACK_WIND_COOKIE (frame, afr_examine_dir_readdir_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->readdir,
+ local->fd, 131072, last_offset, NULL);
+
+ return 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+out:
+ call_count = afr_frame_return (frame);
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ if (call_count == 0) {
+ if (__checksums_differ (local->cont.opendir.checksum,
+ priv->child_count,
+ local->child_up)) {
- priv = this->private;
+ sh->do_entry_self_heal = _gf_true;
+ sh->forced_merge = _gf_true;
- child_count = priv->child_count;
+ reason = "checksums of directory differ";
+ afr_launch_self_heal (frame, this, inode, _gf_false,
+ inode->ia_type, reason, NULL,
+ afr_examine_dir_sh_unwind);
+ } else {
+ afr_set_opendir_done (this, inode);
- ALLOC_OR_GOTO (local, afr_local_t, out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ AFR_STACK_UNWIND (opendir, frame, local->op_ret,
+ local->op_errno, local->fd, NULL);
+ }
+ }
- frame->local = local;
- local->fd = fd_ref (fd);
+ return 0;
+}
- call_count = local->call_count;
-
- for (i = 0; i < child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_opendir_cbk,
- priv->children[i],
- priv->children[i]->fops->opendir,
- loc, fd);
- if (!--call_count)
- break;
- }
- }
+int
+afr_examine_dir (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ int i = 0;
+ int call_count = 0;
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno, fd);
- }
+ local = frame->local;
+ priv = this->private;
- return 0;
-}
+ local->cont.opendir.checksum = GF_CALLOC (priv->child_count,
+ sizeof (*local->cont.opendir.checksum),
+ gf_afr_mt_int32_t);
+ call_count = afr_up_children_count (local->child_up, priv->child_count);
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_examine_dir_readdir_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->readdir,
+ local->fd, 131072, 0, NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
-/**
- * Common algorithm for directory read calls:
- *
- * - Try the fop on the first child that is up
- * - if we have failed due to ENOTCONN:
- * try the next child
- *
- * Applicable to: readdir
- */
int32_t
-afr_readdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
+afr_opendir_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ fd_t *fd, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int32_t up_children_count = 0;
+ int ret = -1;
+ int call_count = -1;
+ int32_t child_index = 0;
+
+ priv = this->private;
+ local = frame->local;
+ child_index = (long) cookie;
- gf_dirent_t * entry = NULL;
+ up_children_count = afr_up_children_count (local->child_up,
+ priv->child_count);
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret >= 0) {
+ local->op_ret = op_ret;
+ ret = afr_child_fd_ctx_set (this, fd, child_index, 0);
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ goto unlock;
+ }
+ }
- int child_index = -1;
+ local->op_errno = op_errno;
+ }
+unlock:
+ UNLOCK (&frame->lock);
- priv = this->private;
- children = priv->children;
+ call_count = afr_frame_return (frame);
- local = frame->local;
+ if (call_count == 0) {
+ if (local->op_ret != 0)
+ goto out;
- child_index = (long) cookie;
+ if (!afr_is_opendir_done (this, local->fd->inode) &&
+ up_children_count > 1 && priv->entry_self_heal) {
- if (op_ret != -1) {
- list_for_each_entry (entry, &entries->list, list) {
- entry->d_ino = afr_itransform (entry->d_ino,
- priv->child_count,
- child_index);
- }
- }
+ /*
+ * This is the first opendir on this inode. We need
+ * to check if the directory's entries are the same
+ * on all subvolumes. This is needed in addition
+ * to regular entry self-heal because the readdir
+ * call is sent only to the first subvolume, and
+ * thus files that exist only there will never be healed
+ * otherwise (assuming changelog shows no anomalies).
+ */
- AFR_STACK_UNWIND (frame, op_ret, op_errno, entries);
+ gf_log (this->name, GF_LOG_TRACE,
+ "reading contents of directory %s looking for mismatch",
+ local->loc.path);
- return 0;
-}
+ afr_examine_dir (frame, this);
+ } else {
+ /* do the unwind */
+ goto out;
+ }
+ }
+
+ return 0;
-int32_t
-afr_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset)
-{
- afr_private_t * priv = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
-
- int ret = -1;
-
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
- children = priv->children;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- frame->local = local;
-
- call_child = afr_first_up_child (priv);
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_DEBUG,
- "no child is up");
- goto out;
- }
-
- local->fd = fd_ref (fd);
- local->cont.readdir.size = size;
- local->cont.readdir.offset = offset;
-
- STACK_WIND_COOKIE (frame, afr_readdir_cbk, (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->readdir,
- fd, size, offset);
-
- op_ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
- return 0;
+ AFR_STACK_UNWIND (opendir, frame, local->op_ret,
+ local->op_errno, local->fd, NULL);
+
+ return 0;
}
int32_t
-afr_getdents_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dir_entry_t *entry, int32_t count)
+afr_opendir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, fd_t *fd)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ int child_count = 0;
+ int i = 0;
+ int ret = -1;
+ int call_count = -1;
+ int32_t op_errno = 0;
- int unwind = 1;
- int last_tried = -1;
- int this_try = -1;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- priv = this->private;
- children = priv->children;
+ priv = this->private;
- local = frame->local;
+ child_count = priv->child_count;
- if (op_ret == -1) {
- last_tried = local->cont.getdents.last_tried;
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- if (all_tried (last_tried, priv->child_count)) {
- goto out;
- }
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- this_try = ++local->cont.getdents.last_tried;
- unwind = 0;
+ loc_copy (&local->loc, loc);
- STACK_WIND (frame, afr_getdents_cbk,
- children[this_try],
- children[this_try]->fops->getdents,
- local->fd, local->cont.getdents.size,
- local->cont.getdents.offset, local->cont.getdents.flag);
- }
+ local->fd = fd_ref (fd);
+ call_count = local->call_count;
+
+ for (i = 0; i < child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_opendir_cbk,
+ (void*) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->opendir,
+ loc, fd, NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ ret = 0;
out:
- if (unwind) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno, entry, count);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (opendir, frame, -1, op_errno, fd, NULL);
- return 0;
+ return 0;
}
-int32_t
-afr_getdents (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, int32_t flag)
+/**
+ * Common algorithm for directory read calls:
+ *
+ * - Try the fop on the first child that is up
+ * - if we have failed due to ENOTCONN:
+ * try the next child
+ *
+ * Applicable to: readdir
+ */
+
+
+struct entry_name {
+ char *name;
+ struct list_head list;
+};
+
+static void
+afr_forget_entries (fd_t *fd)
{
- afr_private_t * priv = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
+ struct entry_name *entry = NULL;
+ struct entry_name *tmp = NULL;
+ int ret = 0;
+ uint64_t ctx = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ ret = fd_ctx_get (fd, THIS, &ctx);
+ if (ret < 0) {
+ gf_log (THIS->name, GF_LOG_INFO,
+ "could not get fd ctx for fd=%p", fd);
+ return;
+ }
+
+ fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+
+ list_for_each_entry_safe (entry, tmp, &fd_ctx->entries, list) {
+ GF_FREE (entry->name);
+ list_del (&entry->list);
+ GF_FREE (entry);
+ }
+}
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+static void
+afr_readdir_filter_trash_dir (gf_dirent_t *entries, fd_t *fd)
+{
+ gf_dirent_t * entry = NULL;
+ gf_dirent_t * tmp = NULL;
+
+ list_for_each_entry_safe (entry, tmp, &entries->list, list) {
+ if (__is_root_gfid (fd->inode->gfid) &&
+ !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) {
+ list_del_init (&entry->list);
+ GF_FREE (entry);
+ }
+ }
+}
+
+int32_t
+afr_readdir_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ gf_dirent_t *entries, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ if (op_ret == -1)
+ goto out;
- priv = this->private;
- children = priv->children;
+ local = frame->local;
+ afr_readdir_filter_trash_dir (entries, local->fd);
- ALLOC_OR_GOTO (local, afr_local_t, out);
+out:
+ AFR_STACK_UNWIND (readdir, frame, op_ret, op_errno, entries, NULL);
+ return 0;
+}
- call_child = afr_first_up_child (priv);
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_DEBUG,
- "no child is up.");
- goto out;
- }
- local->cont.getdents.last_tried = call_child;
+int32_t
+afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
- local->fd = fd_ref (fd);
+ if (op_ret == -1)
+ goto out;
- local->cont.getdents.size = size;
- local->cont.getdents.offset = offset;
- local->cont.getdents.flag = flag;
-
- frame->local = local;
+ local = frame->local;
+ afr_readdir_filter_trash_dir (entries, local->fd);
- STACK_WIND (frame, afr_getdents_cbk,
- children[call_child], children[call_child]->fops->getdents,
- fd, size, offset, flag);
+out:
+ AFR_STACK_UNWIND (readdirp, frame, op_ret, op_errno, entries, NULL);
+ return 0;
+}
- op_ret = 0;
+int32_t
+afr_do_readdir (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset, int whichop, dict_t *dict)
+{
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ int call_child = 0;
+ afr_local_t *local = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int ret = -1;
+ int32_t op_errno = 0;
+ uint64_t read_child = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+ children = priv->children;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ local->fresh_children = afr_children_create (priv->child_count);
+ if (!local->fresh_children) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ read_child = afr_inode_get_read_ctx (this, fd->inode,
+ local->fresh_children);
+ ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.readdir.last_index);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx) {
+ op_errno = EBADF;
+ goto out;
+ }
+
+ if ((offset == 0) || (fd_ctx->call_child == -1)) {
+ fd_ctx->call_child = call_child;
+ } else if ((priv->readdir_failover == _gf_false) &&
+ (call_child != fd_ctx->call_child)) {
+ op_errno = EBADF;
+ goto out;
+ }
+
+ local->fd = fd_ref (fd);
+ local->cont.readdir.size = size;
+ local->cont.readdir.dict = (dict)? dict_ref (dict) : NULL;
+
+ if (whichop == GF_FOP_READDIR)
+ STACK_WIND_COOKIE (frame, afr_readdir_cbk,
+ (void *) (long) call_child,
+ children[call_child],
+ children[call_child]->fops->readdir, fd,
+ size, offset, dict);
+ else
+ STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
+ (void *) (long) call_child,
+ children[call_child],
+ children[call_child]->fops->readdirp, fd,
+ size, offset, dict);
+
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
+ AFR_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
- return 0;
+int32_t
+afr_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
+{
+ afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIR, xdata);
+ return 0;
}
+int32_t
+afr_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *dict)
+{
+ afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIRP, dict);
+ return 0;
+}
+
+
+int32_t
+afr_releasedir (xlator_t *this, fd_t *fd)
+{
+ afr_forget_entries (fd);
+ afr_cleanup_fd_ctx (this, fd);
+
+ return 0;
+}
diff --git a/xlators/cluster/afr/src/afr-dir-read.h b/xlators/cluster/afr/src/afr-dir-read.h
index 6d981fdfd..09456d159 100644
--- a/xlators/cluster/afr/src/afr-dir-read.h
+++ b/xlators/cluster/afr/src/afr-dir-read.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __DIR_READ_H__
@@ -23,25 +14,23 @@
int32_t
afr_opendir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, fd_t *fd);
+ loc_t *loc, fd_t *fd, dict_t *xdata);
int32_t
-afr_closedir (call_frame_t *frame, xlator_t *this,
- fd_t *fd);
+afr_releasedir (xlator_t *this, fd_t *fd);
int32_t
afr_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset);
+ fd_t *fd, size_t size, off_t offset, dict_t *xdata);
int32_t
-afr_getdents (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, int32_t flag);
-
+afr_readdirp (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset, dict_t *dict);
int32_t
afr_checksum (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags);
+ loc_t *loc, int32_t flags, dict_t *xdata);
#endif /* __DIR_READ_H__ */
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
index b07ac3c48..1943b719b 100644
--- a/xlators/cluster/afr/src/afr-dir-write.c
+++ b/xlators/cluster/afr/src/afr-dir-write.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -47,268 +38,470 @@
#include "afr.h"
#include "afr-transaction.h"
+int
+afr_build_parent_loc (loc_t *parent, loc_t *child, int32_t *op_errno)
+{
+ int ret = -1;
+ char *child_path = NULL;
+
+ if (!child->parent) {
+ if (op_errno)
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ child_path = gf_strdup (child->path);
+ if (!child_path) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+ parent->path = gf_strdup( dirname (child_path) );
+ if (!parent->path) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+ parent->inode = inode_ref (child->parent);
+ uuid_copy (parent->gfid, child->pargfid);
+
+ ret = 0;
+out:
+ GF_FREE(child_path);
+
+ return ret;
+}
void
-afr_build_parent_loc (loc_t *parent, loc_t *child)
+__dir_entry_fop_common_cbk (call_frame_t *frame, int child_index,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, struct iatt *prenewparent,
+ struct iatt *postnewparent)
{
- char *tmp = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (afr_fop_failed (op_ret, op_errno))
+ afr_transaction_fop_failed (frame, this, child_index);
+
+ if (op_ret > -1) {
+ local->op_ret = op_ret;
+
+ if ((local->success_count == 0) ||
+ (child_index == local->read_child_index)) {
+ local->cont.dir_fop.preparent = *preparent;
+ local->cont.dir_fop.postparent = *postparent;
+ if (buf)
+ local->cont.dir_fop.buf = *buf;
+ if (prenewparent)
+ local->cont.dir_fop.prenewparent = *prenewparent;
+ if (postnewparent)
+ local->cont.dir_fop.postnewparent = *postnewparent;
+ }
- if (!child->parent) {
- loc_copy (parent, child);
- return;
- }
+ local->cont.dir_fop.inode = inode;
- tmp = strdup (child->path);
- parent->path = strdup (dirname (tmp));
- FREE (tmp);
+ local->fresh_children[local->success_count] = child_index;
+ local->success_count++;
+ local->child_errno[child_index] = 0;
+ } else {
+ local->child_errno[child_index] = op_errno;
+ }
- parent->name = strrchr (parent->path, '/');
- if (parent->name)
- parent->name++;
+ local->op_errno = op_errno;
+}
- parent->inode = inode_ref (child->parent);
- parent->parent = inode_parent (parent->inode, 0, NULL);
- parent->ino = parent->inode->ino;
+int
+afr_mark_new_entry_changelog_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ dict_t *xattr, dict_t *xdata)
+{
+ int call_count = 0;
+
+ call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ AFR_STACK_DESTROY (frame);
+ }
+ return 0;
}
-/* {{{ create */
+void
+afr_mark_new_entry_changelog (call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t *new_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_local_t *new_local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t **xattr = NULL;
+ int32_t **changelog = NULL;
+ int i = 0;
+ GF_UNUSED int op_errno = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ new_frame = copy_frame (frame);
+ if (!new_frame) {
+ goto out;
+ }
-int
-afr_create_unwind (call_frame_t *frame, xlator_t *this)
+ AFR_LOCAL_ALLOC_OR_GOTO (new_frame->local, out);
+ new_local = new_frame->local;
+ changelog = afr_matrix_create (priv->child_count, AFR_NUM_CHANGE_LOGS);
+ if (!changelog)
+ goto out;
+
+ xattr = GF_CALLOC (priv->child_count, sizeof (*xattr),
+ gf_afr_mt_dict_t);
+ if (!xattr)
+ goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_errno[i])
+ continue;
+ xattr[i] = dict_new ();
+ if (!xattr[i])
+ goto out;
+ }
+
+ afr_prepare_new_entry_pending_matrix (changelog,
+ afr_is_errno_set,
+ local->child_errno,
+ &local->cont.dir_fop.buf,
+ priv->child_count);
+
+ new_local->pending = changelog;
+ uuid_copy (new_local->loc.gfid, local->cont.dir_fop.buf.ia_gfid);
+ new_local->loc.inode = inode_ref (local->cont.dir_fop.inode);
+ new_local->call_count = local->success_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_errno[i])
+ continue;
+
+ afr_set_pending_dict (priv, xattr[i], changelog, i, LOCAL_LAST);
+ STACK_WIND_COOKIE (new_frame, afr_mark_new_entry_changelog_cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &new_local->loc, GF_XATTROP_ADD_ARRAY,
+ xattr[i], NULL);
+ }
+ new_frame = NULL;
+out:
+ if (new_frame)
+ AFR_STACK_DESTROY (new_frame);
+ afr_xattr_array_destroy (xattr, priv->child_count);
+ return;
+}
+
+gf_boolean_t
+afr_is_new_entry_changelog_needed (glusterfs_fop_t fop)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
- local->cont.create.fd,
- local->cont.create.inode,
- &local->cont.create.buf);
- }
-
- return 0;
+ glusterfs_fop_t fops[] = {GF_FOP_CREATE, GF_FOP_MKNOD, GF_FOP_NULL};
+ int i = 0;
+
+ for (i = 0; fops[i] != GF_FOP_NULL; i++) {
+ if (fop == fops[i])
+ return _gf_true;
+ }
+ return _gf_false;
}
+void
+afr_dir_fop_mark_entry_pending_changelog (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (local->op_ret < 0)
+ goto out;
+
+ if (local->success_count == priv->child_count)
+ goto out;
+
+ if (!afr_is_new_entry_changelog_needed (local->op))
+ goto out;
+
+ afr_mark_new_entry_changelog (frame, this);
+
+out:
+ return;
+}
+
+void
+afr_dir_fop_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (local->cont.dir_fop.inode == NULL)
+ goto done;
+ afr_set_read_ctx_from_policy (this, local->cont.dir_fop.inode,
+ local->fresh_children,
+ local->read_child_index,
+ priv->read_child,
+ local->cont.dir_fop.buf.ia_gfid);
+done:
+ local->transaction.unwind (frame, this);
+ afr_dir_fop_mark_entry_pending_changelog (frame, this);
+ local->transaction.resume (frame, this);
+}
+
+/* {{{ create */
int
-afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- fd_t *fd, inode_t *inode, struct stat *buf)
+afr_create_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- int ret = 0;
+ local = frame->local;
- int call_count = -1;
- int child_index = -1;
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame) {
+ main_frame = local->transaction.main_frame;
+ }
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (create, main_frame,
+ local->op_ret, local->op_errno,
+ local->cont.create.fd,
+ local->cont.dir_fop.inode,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ local->xdata_rsp);
+ }
- local = frame->local;
- priv = this->private;
+ return 0;
+}
- child_index = (long) cookie;
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- ret = afr_fd_ctx_set (this, fd);
+int
+afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ fd_t *fd, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ uint64_t ctx = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int ret = 0;
+ int call_count = -1;
+ int child_index = -1;
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not set ctx on fd=%p", fd);
+ local = frame->local;
- local->op_ret = -1;
- local->op_errno = -ret;
- }
+ child_index = (long) cookie;
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
+ LOCK (&frame->lock);
+ {
+ if (op_ret > -1) {
+ ret = afr_fd_ctx_set (this, fd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not set ctx on fd=%p", fd);
+
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ goto unlock;
+ }
- if (local->success_count == 0) {
- local->cont.create.buf = *buf;
- local->cont.create.buf.st_ino =
- afr_itransform (buf->st_ino,
- priv->child_count,
- child_index);
-
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
- }
-
- if (child_index == local->read_child_index) {
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
+ ret = fd_ctx_get (fd, this, &ctx);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not get fd ctx for fd=%p", fd);
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ goto unlock;
}
- local->cont.create.inode = inode;
+ fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
+ fd_ctx->flags = local->cont.create.flags;
- local->success_count++;
- }
+ if (local->success_count == 0) {
+ if (xdata)
+ local->xdata_rsp = dict_ref(xdata);
+ }
+ }
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, inode, buf,
+ preparent, postparent, NULL, NULL);
+ }
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+unlock:
+ UNLOCK (&frame->lock);
- call_count = afr_frame_return (frame);
+ call_count = afr_frame_return (frame);
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return 0;
}
int
afr_create_wind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_create_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->create,
- &local->loc,
- local->cont.create.flags,
- local->cont.create.mode,
- local->cont.create.fd);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_create_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->create,
+ &local->loc,
+ local->cont.create.flags,
+ local->cont.create.mode,
+ local->umask,
+ local->cont.create.fd,
+ local->xdata_req);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
}
int
afr_create_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
+ afr_local_t * local = NULL;
- local = frame->local;
+ local = frame->local;
- local->transaction.unwind (frame, this);
+ local->transaction.unwind (frame, this);
- AFR_STACK_DESTROY (frame);
+ AFR_STACK_DESTROY (frame);
- return 0;
+ return 0;
}
int
afr_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode, fd_t *fd)
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *params)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
-
- int ret = -1;
-
- int op_ret = -1;
- int op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- priv = this->private;
+ priv = this->private;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+ QUORUM_CHECK(create,out);
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- transaction_frame->local = local;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- loc_copy (&local->loc, loc);
+ loc_copy (&local->loc, loc);
LOCK (&priv->read_child_lock);
{
- local->read_child_index = (++priv->read_child_rr)
+ local->read_child_index = (++priv->read_child_rr)
% (priv->child_count);
}
UNLOCK (&priv->read_child_lock);
- local->cont.create.flags = flags;
- local->cont.create.mode = mode;
- local->cont.create.fd = fd_ref (fd);
-
- local->transaction.fop = afr_create_wind;
- local->transaction.done = afr_create_done;
- local->transaction.unwind = afr_create_unwind;
-
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ local->op = GF_FOP_CREATE;
+ local->cont.create.flags = flags;
+ local->cont.create.mode = mode;
+ local->cont.create.fd = fd_ref (fd);
+ local->umask = umask;
+ if (params)
+ local->xdata_req = dict_ref (params);
+
+ local->transaction.fop = afr_create_wind;
+ local->transaction.done = afr_create_done;
+ local->transaction.unwind = afr_create_unwind;
+
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (create, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL, NULL);
+ }
- return 0;
+ return 0;
}
/* }}} */
@@ -318,220 +511,201 @@ out:
int
afr_mknod_unwind (call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
- local->cont.mknod.inode,
- &local->cont.mknod.buf);
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame) {
+ main_frame = local->transaction.main_frame;
+ }
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (mknod, main_frame,
+ local->op_ret, local->op_errno,
+ local->cont.dir_fop.inode,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ NULL);
}
- return 0;
+ return 0;
}
int
-afr_mknod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *buf)
+afr_mknod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int child_index = -1;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0){
- local->cont.mknod.buf = *buf;
- local->cont.mknod.buf.st_ino =
- afr_itransform (buf->st_ino,
- priv->child_count,
- child_index);
-
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
- }
+ int call_count = -1;
+ int child_index = -1;
- if (child_index == local->read_child_index) {
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
- }
-
- local->cont.mknod.inode = inode;
+ child_index = (long) cookie;
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ LOCK (&frame->lock);
+ {
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, inode, buf,
+ preparent, postparent, NULL, NULL);
+ }
+ UNLOCK (&frame->lock);
- call_count = afr_frame_return (frame);
+ call_count = afr_frame_return (frame);
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return 0;
}
int32_t
afr_mknod_wind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_mknod_wind_cbk, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->mknod,
- &local->loc, local->cont.mknod.mode,
- local->cont.mknod.dev);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_mknod_wind_cbk, (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->mknod,
+ &local->loc, local->cont.mknod.mode,
+ local->cont.mknod.dev,
+ local->umask,
+ local->xdata_req);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
}
int
afr_mknod_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
+ afr_local_t * local = NULL;
- local = frame->local;
+ local = frame->local;
- local->transaction.unwind (frame, this);
- AFR_STACK_DESTROY (frame);
+ local->transaction.unwind (frame, this);
+ AFR_STACK_DESTROY (frame);
- return 0;
+ return 0;
}
int
-afr_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t dev)
+afr_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t dev, mode_t umask, dict_t *params)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
-
- int ret = -1;
-
- int op_ret = -1;
- int op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- priv = this->private;
+ priv = this->private;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+ QUORUM_CHECK(mknod,out);
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- transaction_frame->local = local;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- loc_copy (&local->loc, loc);
+ loc_copy (&local->loc, loc);
LOCK (&priv->read_child_lock);
{
- local->read_child_index = (++priv->read_child_rr)
+ local->read_child_index = (++priv->read_child_rr)
% (priv->child_count);
}
UNLOCK (&priv->read_child_lock);
- local->cont.mknod.mode = mode;
- local->cont.mknod.dev = dev;
-
- local->transaction.fop = afr_mknod_wind;
- local->transaction.done = afr_mknod_done;
- local->transaction.unwind = afr_mknod_unwind;
-
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ local->op = GF_FOP_MKNOD;
+ local->cont.mknod.mode = mode;
+ local->cont.mknod.dev = dev;
+ local->umask = umask;
+ if (params)
+ local->xdata_req = dict_ref (params);
+
+ local->transaction.fop = afr_mknod_wind;
+ local->transaction.done = afr_mknod_done;
+ local->transaction.unwind = afr_mknod_unwind;
+
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (mknod, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL);
+ }
- return 0;
+ return 0;
}
/* }}} */
@@ -542,220 +716,201 @@ out:
int
afr_mkdir_unwind (call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ local = frame->local;
- if (main_frame) {
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
- local->cont.mkdir.inode,
- &local->cont.mkdir.buf);
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame) {
+ main_frame = local->transaction.main_frame;
+ }
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (mkdir, main_frame,
+ local->op_ret, local->op_errno,
+ local->cont.dir_fop.inode,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ NULL);
}
- return 0;
+ return 0;
}
int
-afr_mkdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *buf)
+afr_mkdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int child_index = -1;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0) {
- local->cont.mkdir.buf = *buf;
- local->cont.mkdir.buf.st_ino =
- afr_itransform (buf->st_ino, priv->child_count,
- child_index);
-
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
- }
-
- if (child_index == local->read_child_index) {
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
- }
+ int call_count = -1;
+ int child_index = -1;
- local->cont.mkdir.inode = inode;
+ child_index = (long) cookie;
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ LOCK (&frame->lock);
+ {
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, inode, buf,
+ preparent, postparent, NULL, NULL);
+ }
+ UNLOCK (&frame->lock);
- call_count = afr_frame_return (frame);
+ call_count = afr_frame_return (frame);
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return 0;
}
int
afr_mkdir_wind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_mkdir_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->mkdir,
- &local->loc, local->cont.mkdir.mode);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_mkdir_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->mkdir,
+ &local->loc, local->cont.mkdir.mode,
+ local->umask,
+ local->xdata_req);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
}
int
afr_mkdir_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
+ afr_local_t * local = NULL;
- local = frame->local;
+ local = frame->local;
- local->transaction.unwind (frame, this);
+ local->transaction.unwind (frame, this);
- AFR_STACK_DESTROY (frame);
+ AFR_STACK_DESTROY (frame);
- return 0;
+ return 0;
}
-
int
afr_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode)
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *params)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
-
- int ret = -1;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
- int op_ret = -1;
- int op_errno = 0;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ priv = this->private;
- priv = this->private;
+ QUORUM_CHECK(mkdir,out);
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- transaction_frame->local = local;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- loc_copy (&local->loc, loc);
+ loc_copy (&local->loc, loc);
LOCK (&priv->read_child_lock);
{
- local->read_child_index = (++priv->read_child_rr)
+ local->read_child_index = (++priv->read_child_rr)
% (priv->child_count);
}
UNLOCK (&priv->read_child_lock);
- local->cont.mkdir.mode = mode;
-
- local->transaction.fop = afr_mkdir_wind;
- local->transaction.done = afr_mkdir_done;
- local->transaction.unwind = afr_mkdir_unwind;
-
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ local->cont.mkdir.mode = mode;
+ local->umask = umask;
+ if (params)
+ local->xdata_req = dict_ref (params);
+
+ local->op = GF_FOP_MKDIR;
+ local->transaction.fop = afr_mkdir_wind;
+ local->transaction.done = afr_mkdir_done;
+ local->transaction.unwind = afr_mkdir_unwind;
+
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
+ AFR_STACK_UNWIND (mkdir, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL);
+ }
- return 0;
+ return 0;
}
/* }}} */
@@ -766,222 +921,196 @@ out:
int
afr_link_unwind (call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- local->cont.link.buf.st_ino = local->cont.link.ino;
-
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
- local->cont.link.inode,
- &local->cont.link.buf);
- }
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame) {
+ main_frame = local->transaction.main_frame;
+ }
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (link, main_frame,
+ local->op_ret, local->op_errno,
+ local->cont.dir_fop.inode,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ NULL);
+ }
- return 0;
+ return 0;
}
int
-afr_link_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct stat *buf)
+afr_link_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int child_index = -1;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0) {
- local->cont.link.buf = *buf;
- local->cont.link.buf.st_ino =
- afr_itransform (buf->st_ino, priv->child_count,
- child_index);
-
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
- }
-
- if (child_index == local->read_child_index) {
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
- }
+ int call_count = -1;
+ int child_index = -1;
- local->cont.link.inode = inode;
+ child_index = (long) cookie;
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ LOCK (&frame->lock);
+ {
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, inode, buf,
+ preparent, postparent, NULL, NULL);
+ }
+ UNLOCK (&frame->lock);
- call_count = afr_frame_return (frame);
+ call_count = afr_frame_return (frame);
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return 0;
}
int
afr_link_wind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_link_wind_cbk, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->link,
- &local->loc,
- &local->newloc);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_link_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->link,
+ &local->loc,
+ &local->newloc, local->xdata_req);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
}
int
afr_link_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = frame->local;
+ afr_local_t * local = frame->local;
- local->transaction.unwind (frame, this);
+ local->transaction.unwind (frame, this);
- AFR_STACK_DESTROY (frame);
+ AFR_STACK_DESTROY (frame);
- return 0;
+ return 0;
}
int
afr_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
- int ret = -1;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- int op_ret = -1;
- int op_errno = 0;
+ priv = this->private;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ QUORUM_CHECK(link,out);
- priv = this->private;
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- transaction_frame->local = local;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- loc_copy (&local->loc, oldloc);
- loc_copy (&local->newloc, newloc);
+ loc_copy (&local->loc, oldloc);
+ loc_copy (&local->newloc, newloc);
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
LOCK (&priv->read_child_lock);
{
- local->read_child_index = (++priv->read_child_rr)
+ local->read_child_index = (++priv->read_child_rr)
% (priv->child_count);
}
UNLOCK (&priv->read_child_lock);
- local->cont.link.ino = oldloc->inode->ino;
-
- local->transaction.fop = afr_link_wind;
- local->transaction.done = afr_link_done;
- local->transaction.unwind = afr_link_unwind;
-
- afr_build_parent_loc (&local->transaction.parent_loc, oldloc);
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (oldloc->path);
- local->transaction.new_basename = AFR_BASENAME (newloc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
-
- op_ret = 0;
+ local->op = GF_FOP_LINK;
+ local->transaction.fop = afr_link_wind;
+ local->transaction.done = afr_link_done;
+ local->transaction.unwind = afr_link_unwind;
+
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, newloc,
+ &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME (newloc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+ ret = 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno);
- }
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (link, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL);
+ }
- return 0;
+ return 0;
}
/* }}} */
@@ -992,221 +1121,202 @@ out:
int
afr_symlink_unwind (call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
- local->cont.symlink.inode,
- &local->cont.symlink.buf);
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame) {
+ main_frame = local->transaction.main_frame;
+ }
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (symlink, main_frame,
+ local->op_ret, local->op_errno,
+ local->cont.dir_fop.inode,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ NULL);
}
- return 0;
+ return 0;
}
int
-afr_symlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct stat *buf)
+afr_symlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int child_index = -1;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0) {
- local->cont.symlink.buf = *buf;
- local->cont.symlink.buf.st_ino =
- afr_itransform (buf->st_ino, priv->child_count,
- child_index);
-
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
- }
+ int call_count = -1;
+ int child_index = -1;
- if (child_index == local->read_child_index) {
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
- }
+ child_index = (long) cookie;
- local->cont.symlink.inode = inode;
-
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ LOCK (&frame->lock);
+ {
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, inode, buf,
+ preparent, postparent, NULL, NULL);
+ }
+ UNLOCK (&frame->lock);
- call_count = afr_frame_return (frame);
+ call_count = afr_frame_return (frame);
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return 0;
}
int
afr_symlink_wind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
- int call_count = -1;
- int i = 0;
+ local = frame->local;
+ priv = this->private;
- local = frame->local;
- priv = this->private;
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ local->call_count = call_count;
- local->call_count = call_count;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_symlink_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->symlink,
+ local->cont.symlink.linkpath,
+ &local->loc,
+ local->umask,
+ local->xdata_req);
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_symlink_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->symlink,
- local->cont.symlink.linkpath,
- &local->loc);
+ if (!--call_count)
+ break;
- if (!--call_count)
- break;
+ }
+ }
- }
- }
-
- return 0;
+ return 0;
}
int
afr_symlink_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = frame->local;
+ afr_local_t * local = frame->local;
+
+ local->transaction.unwind (frame, this);
- local->transaction.unwind (frame, this);
+ AFR_STACK_DESTROY (frame);
- AFR_STACK_DESTROY (frame);
-
- return 0;
+ return 0;
}
int
afr_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *loc)
+ const char *linkpath, loc_t *loc, mode_t umask, dict_t *params)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
- int ret = -1;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- int op_ret = -1;
- int op_errno = 0;
+ priv = this->private;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ QUORUM_CHECK(symlink,out);
- priv = this->private;
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- transaction_frame->local = local;
-
- loc_copy (&local->loc, loc);
+ loc_copy (&local->loc, loc);
LOCK (&priv->read_child_lock);
{
- local->read_child_index = (++priv->read_child_rr)
+ local->read_child_index = (++priv->read_child_rr)
% (priv->child_count);
}
UNLOCK (&priv->read_child_lock);
- local->cont.symlink.ino = loc->inode->ino;
- local->cont.symlink.linkpath = strdup (linkpath);
-
- local->transaction.fop = afr_symlink_wind;
- local->transaction.done = afr_symlink_done;
- local->transaction.unwind = afr_symlink_unwind;
-
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ local->cont.symlink.linkpath = gf_strdup (linkpath);
+ local->umask = umask;
+ if (params)
+ local->xdata_req = dict_ref (params);
+
+ local->op = GF_FOP_SYMLINK;
+ local->transaction.fop = afr_symlink_wind;
+ local->transaction.done = afr_symlink_done;
+ local->transaction.unwind = afr_symlink_unwind;
+
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL, NULL);
- }
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (symlink, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL);
+ }
- return 0;
+ return 0;
}
/* }}} */
@@ -1216,194 +1326,231 @@ out:
int
afr_rename_unwind (call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- if (main_frame) {
- local->cont.rename.buf.st_ino = local->cont.rename.ino;
+ local = frame->local;
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
- &local->cont.rename.buf);
- }
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame) {
+ main_frame = local->transaction.main_frame;
+ }
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (rename, main_frame,
+ local->op_ret, local->op_errno,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ &local->cont.dir_fop.prenewparent,
+ &local->cont.dir_fop.postnewparent,
+ NULL);
+ }
- return 0;
+ return 0;
}
int
-afr_rename_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+afr_rename_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int child_index = -1;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if ((op_ret != -1) && (local->success_count == 0)) {
- local->op_ret = op_ret;
-
- if (buf) {
- local->cont.rename.buf = *buf;
- local->cont.rename.buf.st_ino =
- afr_itransform (buf->st_ino, priv->child_count,
- child_index);
- }
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ afr_local_t * local = NULL;
+ int call_count = -1;
+ int child_index = -1;
- call_count = afr_frame_return (frame);
+ local = frame->local;
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
-
- return 0;
-}
+ child_index = (long) cookie;
+ LOCK (&frame->lock);
+ {
+ if (afr_fop_failed (op_ret, op_errno) && op_errno != ENOTEMPTY)
+ afr_transaction_fop_failed (frame, this, child_index);
+ local->op_errno = op_errno;
+ local->child_errno[child_index] = op_errno;
-int32_t
-afr_rename_wind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_rename_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->rename,
- &local->loc,
- &local->newloc);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
+ if (op_ret > -1)
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, NULL, buf,
+ preoldparent, postoldparent,
+ prenewparent, postnewparent);
+ }
+ UNLOCK (&frame->lock);
-int
-afr_rename_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = frame->local;
+ call_count = afr_frame_return (frame);
- local->transaction.unwind (frame, this);
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
- AFR_STACK_DESTROY (frame);
-
- return 0;
+ return 0;
}
-int
-afr_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+int32_t
+afr_rename_wind (call_frame_t *frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
- int ret = -1;
+ local = frame->local;
+ priv = this->private;
- int op_ret = -1;
- int op_errno = 0;
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_rename_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->rename,
+ &local->loc,
+ &local->newloc, NULL);
+ if (!--call_count)
+ break;
+ }
+ }
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ return 0;
+}
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
- transaction_frame->local = local;
+int
+afr_rename_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = frame->local;
- loc_copy (&local->loc, oldloc);
- loc_copy (&local->newloc, newloc);
+ local->transaction.unwind (frame, this);
- local->cont.rename.ino = oldloc->inode->ino;
+ AFR_STACK_DESTROY (frame);
- local->transaction.fop = afr_rename_wind;
- local->transaction.done = afr_rename_done;
- local->transaction.unwind = afr_rename_unwind;
+ return 0;
+}
- afr_build_parent_loc (&local->transaction.parent_loc, oldloc);
- afr_build_parent_loc (&local->transaction.new_parent_loc, newloc);
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (oldloc->path);
- local->transaction.new_basename = AFR_BASENAME (newloc->path);
+int
+afr_rename (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
+ int nlockee = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ QUORUM_CHECK(rename,out);
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- afr_transaction (transaction_frame, this, AFR_ENTRY_RENAME_TRANSACTION);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ loc_copy (&local->loc, oldloc);
+ loc_copy (&local->newloc, newloc);
+
+ local->read_child_index = afr_inode_get_read_ctx (this, oldloc->inode, NULL);
+
+ local->op = GF_FOP_RENAME;
+ local->transaction.fop = afr_rename_wind;
+ local->transaction.done = afr_rename_done;
+ local->transaction.unwind = afr_rename_unwind;
+
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, oldloc,
+ &op_errno);
+ if (ret)
+ goto out;
+ ret = afr_build_parent_loc (&local->transaction.new_parent_loc, newloc,
+ &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME (oldloc->path);
+ local->transaction.new_basename = AFR_BASENAME (newloc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = nlockee = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->transaction.new_parent_loc,
+ local->transaction.new_basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ nlockee++;
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ nlockee++;
+ if (local->newloc.inode && IA_ISDIR (local->newloc.inode->ia_type)) {
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->newloc,
+ NULL,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ nlockee++;
+ }
+ qsort (int_lock->lockee, nlockee, sizeof (*int_lock->lockee),
+ afr_entry_lockee_cmp);
+ int_lock->lockee_count = nlockee;
+
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_RENAME_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
+ AFR_STACK_UNWIND (rename, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL, NULL);
+ }
- return 0;
+ return 0;
}
/* }}} */
@@ -1413,182 +1560,190 @@ out:
int
afr_unlink_unwind (call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame)
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno);
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame) {
+ main_frame = local->transaction.main_frame;
+ }
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (unlink, main_frame,
+ local->op_ret, local->op_errno,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ NULL);
+ }
- return 0;
+ return 0;
}
int
-afr_unlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+afr_unlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int child_index = (long) cookie;
- int need_unwind = 0;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
-
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
- }
+ afr_local_t * local = NULL;
+ int call_count = -1;
+ int child_index = (long) cookie;
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ local = frame->local;
- call_count = afr_frame_return (frame);
+ LOCK (&frame->lock);
+ {
+ if (child_index == local->read_child_index) {
+ local->read_child_returned = _gf_true;
+ }
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, NULL, NULL,
+ preparent, postparent, NULL, NULL);
+ }
+ UNLOCK (&frame->lock);
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
+ call_count = afr_frame_return (frame);
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return 0;
}
int32_t
afr_unlink_wind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_unlink_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->unlink,
- &local->loc);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+ local = frame->local;
+ priv = this->private;
-int32_t
-afr_unlink_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = frame->local;
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
- local->transaction.unwind (frame, this);
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
+ local->call_count = call_count;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_unlink_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->unlink,
+ &local->loc, local->xflag,
+ local->xdata_req);
-int32_t
-afr_unlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
+ if (!--call_count)
+ break;
+ }
+ }
- int ret = -1;
+ return 0;
+}
- int op_ret = -1;
- int op_errno = 0;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+int32_t
+afr_unlink_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = frame->local;
- priv = this->private;
+ local->transaction.unwind (frame, this);
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+ AFR_STACK_DESTROY (frame);
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ return 0;
+}
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
- transaction_frame->local = local;
+int32_t
+afr_unlink (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int xflag, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
- loc_copy (&local->loc, loc);
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- local->transaction.fop = afr_unlink_wind;
- local->transaction.done = afr_unlink_done;
- local->transaction.unwind = afr_unlink_unwind;
+ priv = this->private;
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
+ QUORUM_CHECK(unlink,out);
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ loc_copy (&local->loc, loc);
+ local->xflag = xflag;
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
+
+ local->op = GF_FOP_UNLINK;
+ local->transaction.fop = afr_unlink_wind;
+ local->transaction.done = afr_unlink_done;
+ local->transaction.unwind = afr_unlink_unwind;
+
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno);
- }
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (unlink, frame, -1, op_errno,
+ NULL, NULL, NULL);
+ }
- return 0;
+ return 0;
}
/* }}} */
@@ -1600,323 +1755,208 @@ out:
int
afr_rmdir_unwind (call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ local = frame->local;
- if (main_frame)
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno);
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame) {
+ main_frame = local->transaction.main_frame;
+ }
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (rmdir, main_frame,
+ local->op_ret, local->op_errno,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ NULL);
+ }
- return 0;
+ return 0;
}
int
-afr_rmdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+afr_rmdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int child_index = (long) cookie;
- int need_unwind = 0;
+ afr_local_t * local = NULL;
+ int call_count = -1;
+ int child_index = (long) cookie;
+ int read_child = 0;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
-
- if (local->success_count == priv->wait_count)
- need_unwind = 1;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ LOCK (&frame->lock);
+ {
+ if (child_index == read_child) {
+ local->read_child_returned = _gf_true;
+ }
+ if (afr_fop_failed (op_ret, op_errno) && (op_errno != ENOTEMPTY))
+ afr_transaction_fop_failed (frame, this, child_index);
+ local->op_errno = op_errno;
+ local->child_errno[child_index] = op_errno;
+ if (op_ret > -1)
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, NULL, NULL,
+ preparent, postparent, NULL,
+ NULL);
- call_count = afr_frame_return (frame);
+ }
+ UNLOCK (&frame->lock);
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
+ call_count = afr_frame_return (frame);
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return 0;
}
int
afr_rmdir_wind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_rmdir_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->rmdir,
- &local->loc);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
-
-
-int
-afr_rmdir_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
-
-
-int
-afr_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
-
- int ret = -1;
-
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- transaction_frame->local = local;
-
- loc_copy (&local->loc, loc);
-
- local->transaction.fop = afr_rmdir_wind;
- local->transaction.done = afr_rmdir_done;
- local->transaction.unwind = afr_rmdir_unwind;
-
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno);
- }
-
- return 0;
-}
-
-/* }}} */
-
-/* {{{ setdents */
-
-int32_t
-afr_setdents_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
- int call_count = -1;
- int child_index = (long) cookie;
+ local = frame->local;
+ priv = this->private;
- local = frame->local;
- priv = this->private;
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
- if ((op_ret != -1) && (local->success_count == 0)) {
- local->op_ret = op_ret;
- local->success_count++;
- }
+ local->call_count = call_count;
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_rmdir_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->rmdir,
+ &local->loc, local->cont.rmdir.flags,
+ NULL);
- call_count = afr_frame_return (frame);
+ if (!--call_count)
+ break;
+ }
+ }
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return 0;
}
-int32_t
-afr_setdents_wind (call_frame_t *frame, xlator_t *this)
+int
+afr_rmdir_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_setdents_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setdents,
- local->fd, local->cont.setdents.flags,
- local->cont.setdents.entries,
- local->cont.setdents.count);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
+ afr_local_t * local = frame->local;
+ local->transaction.unwind (frame, this);
-int32_t
-afr_setdents_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = frame->local;
+ AFR_STACK_DESTROY (frame);
- AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno);
-
- return 0;
+ return 0;
}
-int32_t
-afr_setdents (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t flags, dir_entry_t *entries, int32_t count)
+int
+afr_rmdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int flags, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
-
- int ret = -1;
-
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- frame->local = local;
-
- local->fd = fd_ref (fd);
-
- local->cont.setdents.flags = flags;
- local->cont.setdents.entries = entries;
- local->cont.setdents.count = count;
-
- local->transaction.fop = afr_setdents_wind;
- local->transaction.done = afr_setdents_done;
-
- local->transaction.basename = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
+ int nlockee = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ QUORUM_CHECK(rmdir,out);
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- afr_transaction (frame, this, AFR_ENTRY_TRANSACTION);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ local->cont.rmdir.flags = flags;
+ loc_copy (&local->loc, loc);
+
+ local->op = GF_FOP_RMDIR;
+ local->transaction.fop = afr_rmdir_wind;
+ local->transaction.done = afr_rmdir_done;
+ local->transaction.unwind = afr_rmdir_unwind;
+
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = nlockee = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ nlockee++;
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->loc,
+ NULL,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ nlockee++;
+ qsort (int_lock->lockee, nlockee, sizeof (*int_lock->lockee),
+ afr_entry_lockee_cmp);
+ int_lock->lockee_count = nlockee;
+
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno);
- }
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL, NULL);
+ }
- return 0;
+ return 0;
}
/* }}} */
diff --git a/xlators/cluster/afr/src/afr-dir-write.h b/xlators/cluster/afr/src/afr-dir-write.h
index 76b4b60fa..02f0a3682 100644
--- a/xlators/cluster/afr/src/afr-dir-write.h
+++ b/xlators/cluster/afr/src/afr-dir-write.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __DIR_WRITE_H__
@@ -22,38 +13,35 @@
int32_t
afr_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode, fd_t *fd);
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *xdata);
int32_t
afr_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t dev);
+ loc_t *loc, mode_t mode, dev_t dev, mode_t umask, dict_t *xdata);
int32_t
afr_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode);
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata);
int32_t
afr_unlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc);
+ loc_t *loc, int xflag, dict_t *xdata);
int32_t
afr_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc);
+ loc_t *loc, int flags, dict_t *xdata);
int32_t
afr_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc);
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata);
int32_t
afr_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc);
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata);
-int32_t
+int
afr_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *oldloc);
-
-int32_t
-afr_setdents (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t flags, dir_entry_t *entries, int32_t count);
+ const char *linkpath, loc_t *oldloc, mode_t umask, dict_t *params);
#endif /* __DIR_WRITE_H__ */
diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
index 4819d06cd..e06e3b2f2 100644
--- a/xlators/cluster/afr/src/afr-inode-read.c
+++ b/xlators/cluster/afr/src/afr-inode-read.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -44,9 +35,6 @@
#include "compat-errno.h"
#include "compat.h"
-#include "afr.h"
-
-
/**
* Common algorithm for inode read calls:
*
@@ -61,114 +49,115 @@
int32_t
afr_access_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
-
- int unwind = 1;
- int last_tried = -1;
- int this_try = -1;
- int read_child = -1;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+ int unwind = 1;
+ int32_t *last_index = NULL;
+ int32_t next_call_child = -1;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
- priv = this->private;
- children = priv->children;
+ priv = this->private;
+ children = priv->children;
- local = frame->local;
+ local = frame->local;
read_child = (long) cookie;
- if (op_ret == -1) {
- retry:
- last_tried = local->cont.access.last_tried;
-
- if (all_tried (last_tried, priv->child_count)) {
- goto out;
- }
- this_try = ++local->cont.access.last_tried;
-
- if (this_try == read_child) {
- goto retry;
- }
+ if (op_ret == -1) {
+ last_index = &local->cont.access.last_index;
+ fresh_children = local->fresh_children;
+ next_call_child = afr_next_call_child (fresh_children,
+ local->child_up,
+ priv->child_count,
+ last_index, read_child);
+ if (next_call_child < 0)
+ goto out;
- unwind = 0;
+ unwind = 0;
- STACK_WIND_COOKIE (frame, afr_access_cbk,
- (void *) (long) read_child,
- children[this_try],
- children[this_try]->fops->access,
- &local->loc, local->cont.access.mask);
- }
+ STACK_WIND_COOKIE (frame, afr_access_cbk,
+ (void *) (long) read_child,
+ children[next_call_child],
+ children[next_call_child]->fops->access,
+ &local->loc, local->cont.access.mask,
+ NULL);
+ }
out:
- if (unwind) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno);
- }
+ if (unwind) {
+ AFR_STACK_UNWIND (access, frame, op_ret, op_errno, xdata);
+ }
- return 0;
+ return 0;
}
int32_t
-afr_access (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t mask)
+afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
-
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ int call_child = 0;
+ afr_local_t *local = NULL;
+ int32_t op_errno = 0;
int32_t read_child = -1;
+ int ret = -1;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv->children, out);
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
+ children = priv->children;
- children = priv->children;
+ AFR_SBRAIN_CHECK_LOC (loc, out);
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- read_child = afr_read_child (this, loc->inode);
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- if (read_child >= 0) {
- call_child = read_child;
+ local->fresh_children = afr_children_create (priv->child_count);
+ if (!local->fresh_children) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- local->cont.access.last_tried = -1;
- } else {
- call_child = afr_first_up_child (priv);
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_DEBUG,
- "no child is up");
- goto out;
- }
-
- local->cont.access.last_tried = call_child;
+ read_child = afr_inode_get_read_ctx (this, loc->inode,
+ local->fresh_children);
+ ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.access.last_index);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
}
- loc_copy (&local->loc, loc);
- local->cont.access.mask = mask;
+ loc_copy (&local->loc, loc);
+ local->cont.access.mask = mask;
- STACK_WIND_COOKIE (frame, afr_access_cbk,
- (void *) (long) call_child,
- children[call_child], children[call_child]->fops->access,
- loc, mask);
+ STACK_WIND_COOKIE (frame, afr_access_cbk,
+ (void *) (long) call_child,
+ children[call_child],
+ children[call_child]->fops->access,
+ loc, mask, xdata);
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno);
- }
- return 0;
+ if (ret < 0)
+ AFR_STACK_UNWIND (access, frame, -1, op_errno, NULL);
+ return 0;
}
@@ -178,121 +167,111 @@ out:
int32_t
afr_stat_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct stat *buf)
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ struct iatt *buf, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
-
- int unwind = 1;
- int last_tried = -1;
- int this_try = -1;
- int read_child = -1;
-
- priv = this->private;
- children = priv->children;
-
- read_child = (long) cookie;
-
- local = frame->local;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+ int unwind = 1;
+ int32_t *last_index = NULL;
+ int32_t next_call_child = -1;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
+
+ priv = this->private;
+ children = priv->children;
- if (op_ret == -1) {
- retry:
- last_tried = local->cont.stat.last_tried;
+ read_child = (long) cookie;
- if (all_tried (last_tried, priv->child_count)) {
- goto out;
- }
- this_try = ++local->cont.stat.last_tried;
+ local = frame->local;
- if (this_try == read_child) {
- goto retry;
- }
+ if (op_ret == -1) {
+ last_index = &local->cont.stat.last_index;
+ fresh_children = local->fresh_children;
+ next_call_child = afr_next_call_child (fresh_children,
+ local->child_up,
+ priv->child_count,
+ last_index, read_child);
+ if (next_call_child < 0)
+ goto out;
- unwind = 0;
+ unwind = 0;
- STACK_WIND_COOKIE (frame, afr_stat_cbk,
- (void *) (long) read_child,
- children[this_try],
- children[this_try]->fops->stat,
- &local->loc);
- }
+ STACK_WIND_COOKIE (frame, afr_stat_cbk,
+ (void *) (long) read_child,
+ children[next_call_child],
+ children[next_call_child]->fops->stat,
+ &local->loc, NULL);
+ }
out:
- if (unwind) {
- if (op_ret != -1)
- buf->st_ino = local->cont.stat.ino;
-
- AFR_STACK_UNWIND (frame, op_ret, op_errno, buf);
- }
+ if (unwind) {
+ AFR_STACK_UNWIND (stat, frame, op_ret, op_errno, buf, xdata);
+ }
- return 0;
+ return 0;
}
int32_t
-afr_stat (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
+afr_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
-
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
+ int call_child = 0;
+ int32_t op_errno = 0;
int32_t read_child = -1;
- int call_child = 0;
+ int ret = -1;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv->children, out);
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
+ children = priv->children;
- children = priv->children;
+ AFR_SBRAIN_CHECK_LOC (loc, out);
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- frame->local = local;
-
- read_child = afr_read_child (this, loc->inode);
-
- if (read_child >= 0) {
- call_child = read_child;
-
- local->cont.stat.last_tried = -1;
-
- } else {
- call_child = afr_first_up_child (priv);
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_DEBUG,
- "no child is up");
- goto out;
- }
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- local->cont.stat.last_tried = call_child;
- }
-
- loc_copy (&local->loc, loc);
+ local->fresh_children = afr_children_create (priv->child_count);
+ if (!local->fresh_children) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- local->cont.stat.ino = loc->inode->ino;
+ read_child = afr_inode_get_read_ctx (this, loc->inode,
+ local->fresh_children);
+ ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.stat.last_index);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+ loc_copy (&local->loc, loc);
- STACK_WIND_COOKIE (frame, afr_stat_cbk, (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->stat,
- loc);
+ STACK_WIND_COOKIE (frame, afr_stat_cbk, (void *) (long) call_child,
+ children[call_child],
+ children[call_child]->fops->stat,
+ loc, xdata);
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
@@ -301,124 +280,122 @@ out:
/* {{{ fstat */
int32_t
-afr_fstat_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct stat *buf)
+afr_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
-
- int unwind = 1;
- int last_tried = -1;
- int this_try = -1;
- int read_child = -1;
-
- priv = this->private;
- children = priv->children;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
+ int unwind = 1;
+ int32_t *last_index = NULL;
+ int32_t next_call_child = -1;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
- local = frame->local;
+ priv = this->private;
+ children = priv->children;
- read_child = (long) cookie;
+ local = frame->local;
- if (op_ret == -1) {
- retry:
- last_tried = local->cont.fstat.last_tried;
-
- if (all_tried (last_tried, priv->child_count)) {
- goto out;
- }
- this_try = ++local->cont.fstat.last_tried;
+ read_child = (long) cookie;
- if (this_try == read_child) {
- goto retry;
- }
+ if (op_ret == -1) {
+ last_index = &local->cont.fstat.last_index;
+ fresh_children = local->fresh_children;
+ next_call_child = afr_next_call_child (fresh_children,
+ local->child_up,
+ priv->child_count,
+ last_index, read_child);
+ if (next_call_child < 0)
+ goto out;
- unwind = 0;
+ unwind = 0;
- STACK_WIND_COOKIE (frame, afr_fstat_cbk,
- (void *) (long) read_child,
- children[this_try],
- children[this_try]->fops->fstat,
- local->fd);
- }
+ STACK_WIND_COOKIE (frame, afr_fstat_cbk,
+ (void *) (long) read_child,
+ children[next_call_child],
+ children[next_call_child]->fops->fstat,
+ local->fd, NULL);
+ }
out:
- if (unwind) {
- if (op_ret != -1)
- buf->st_ino = local->cont.fstat.ino;
-
- AFR_STACK_UNWIND (frame, op_ret, op_errno, buf);
- }
+ if (unwind) {
+ AFR_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf, xdata);
+ }
- return 0;
+ return 0;
}
int32_t
afr_fstat (call_frame_t *frame, xlator_t *this,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
-
- int call_child = 0;
- int32_t read_child = -1;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
+ int call_child = 0;
+ int32_t op_errno = 0;
+ int32_t read_child = 0;
+ int ret = -1;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+ VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (fd, out);
- VALIDATE_OR_GOTO (this->private, out);
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv->children, out);
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
+ children = priv->children;
- children = priv->children;
+ VALIDATE_OR_GOTO (fd->inode, out);
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_SBRAIN_CHECK_FD (fd, out);
- frame->local = local;
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- VALIDATE_OR_GOTO (fd->inode, out);
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- read_child = afr_read_child (this, fd->inode);
+ local->fresh_children = afr_children_create (priv->child_count);
+ if (!local->fresh_children) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- if (read_child >= 0) {
- call_child = read_child;
+ read_child = afr_inode_get_read_ctx (this, fd->inode,
+ local->fresh_children);
- local->cont.fstat.last_tried = -1;
- } else {
- call_child = afr_first_up_child (priv);
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_DEBUG,
- "no child is up");
- goto out;
- }
- local->cont.fstat.last_tried = call_child;
+ ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.fstat.last_index);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
}
- local->cont.fstat.ino = fd->inode->ino;
- local->fd = fd_ref (fd);
+ local->fd = fd_ref (fd);
+
+ afr_open_fd_fix (fd, this);
- STACK_WIND_COOKIE (frame, afr_fstat_cbk, (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->fstat,
- fd);
+ STACK_WIND_COOKIE (frame, afr_fstat_cbk, (void *) (long) call_child,
+ children[call_child],
+ children[call_child]->fops->fstat,
+ fd, xdata);
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
/* }}} */
@@ -427,117 +404,115 @@ out:
int32_t
afr_readlink_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- const char *buf)
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ const char *buf, struct iatt *sbuf, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+ int unwind = 1;
+ int32_t *last_index = NULL;
+ int32_t next_call_child = -1;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
- int unwind = 1;
- int last_tried = -1;
- int this_try = -1;
- int read_child = -1;
+ priv = this->private;
+ children = priv->children;
- priv = this->private;
- children = priv->children;
-
- local = frame->local;
+ local = frame->local;
read_child = (long) cookie;
- if (op_ret == -1) {
- retry:
- last_tried = local->cont.readlink.last_tried;
-
- if (all_tried (last_tried, priv->child_count)) {
- goto out;
- }
- this_try = ++local->cont.readlink.last_tried;
-
- if (this_try == read_child) {
- goto retry;
- }
+ if (op_ret == -1) {
+ last_index = &local->cont.readlink.last_index;
+ fresh_children = local->fresh_children;
+ next_call_child = afr_next_call_child (fresh_children,
+ local->child_up,
+ priv->child_count,
+ last_index, read_child);
+ if (next_call_child < 0)
+ goto out;
- unwind = 0;
- STACK_WIND_COOKIE (frame, afr_readlink_cbk,
- (void *) (long) read_child,
- children[this_try],
- children[this_try]->fops->readlink,
- &local->loc,
- local->cont.readlink.size);
- }
+ unwind = 0;
+ STACK_WIND_COOKIE (frame, afr_readlink_cbk,
+ (void *) (long) read_child,
+ children[next_call_child],
+ children[next_call_child]->fops->readlink,
+ &local->loc,
+ local->cont.readlink.size, NULL);
+ }
out:
- if (unwind) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno, buf);
- }
+ if (unwind) {
+ AFR_STACK_UNWIND (readlink, frame, op_ret, op_errno, buf, sbuf,
+ xdata);
+ }
- return 0;
+ return 0;
}
int32_t
afr_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size)
+ loc_t *loc, size_t size, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
-
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ int call_child = 0;
+ afr_local_t *local = NULL;
+ int32_t op_errno = 0;
int32_t read_child = -1;
+ int ret = -1;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
-
- children = priv->children;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv->children, out);
- frame->local = local;
+ children = priv->children;
- read_child = afr_read_child (this, loc->inode);
+ AFR_SBRAIN_CHECK_LOC (loc, out);
- if (read_child >= 0) {
- call_child = read_child;
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- local->cont.readlink.last_tried = -1;
-
- } else {
- call_child = afr_first_up_child (priv);
-
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_DEBUG,
- "no child is up");
- goto out;
- }
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- local->cont.readlink.last_tried = call_child;
+ local->fresh_children = afr_children_create (priv->child_count);
+ if (!local->fresh_children) {
+ op_errno = ENOMEM;
+ goto out;
}
+ read_child = afr_inode_get_read_ctx (this, loc->inode,
+ local->fresh_children);
+ ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.readlink.last_index);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ loc_copy (&local->loc, loc);
- loc_copy (&local->loc, loc);
- local->cont.readlink.size = size;
+ local->cont.readlink.size = size;
- STACK_WIND_COOKIE (frame, afr_readlink_cbk,
- (void *) (long) call_child,
- children[call_child], children[call_child]->fops->readlink,
- loc, size);
+ STACK_WIND_COOKIE (frame, afr_readlink_cbk,
+ (void *) (long) call_child,
+ children[call_child],
+ children[call_child]->fops->readlink,
+ loc, size, xdata);
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
- return 0;
+ if (ret < 0)
+ AFR_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
@@ -551,7 +526,7 @@ struct _xattr_key {
};
-void
+int
__gather_xattr_keys (dict_t *dict, char *key, data_t *value,
void *data)
{
@@ -561,25 +536,25 @@ __gather_xattr_keys (dict_t *dict, char *key, data_t *value,
if (!strncmp (key, AFR_XATTR_PREFIX,
strlen (AFR_XATTR_PREFIX))) {
- xkey = CALLOC (1, sizeof (*xkey));
+ xkey = GF_CALLOC (1, sizeof (*xkey), gf_afr_mt_xattr_key);
if (!xkey)
- return;
+ return -1;
xkey->key = key;
INIT_LIST_HEAD (&xkey->list);
list_add_tail (&xkey->list, list);
}
+ return 0;
}
void
__filter_xattrs (dict_t *dict)
{
- struct list_head keys;
-
- struct _xattr_key *key;
- struct _xattr_key *tmp;
+ struct list_head keys = {0,};
+ struct _xattr_key *key = NULL;
+ struct _xattr_key *tmp = NULL;
INIT_LIST_HEAD (&keys);
@@ -591,7 +566,7 @@ __filter_xattrs (dict_t *dict)
list_del_init (&key->list);
- FREE (key);
+ GF_FREE (key);
}
}
@@ -599,127 +574,1222 @@ __filter_xattrs (dict_t *dict)
int32_t
afr_getxattr_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict)
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
-
- int unwind = 1;
- int last_tried = -1;
- int this_try = -1;
- int read_child = -1;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+ int unwind = 1;
+ int32_t *last_index = NULL;
+ int32_t next_call_child = -1;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
- priv = this->private;
- children = priv->children;
+ priv = this->private;
+ children = priv->children;
- local = frame->local;
+ local = frame->local;
read_child = (long) cookie;
- if (op_ret == -1) {
- retry:
- last_tried = local->cont.getxattr.last_tried;
+ if (op_ret == -1) {
+ last_index = &local->cont.getxattr.last_index;
+ fresh_children = local->fresh_children;
+ next_call_child = afr_next_call_child (fresh_children,
+ local->child_up,
+ priv->child_count,
+ last_index, read_child);
+ if (next_call_child < 0)
+ goto out;
+
+ unwind = 0;
+ STACK_WIND_COOKIE (frame, afr_getxattr_cbk,
+ (void *) (long) read_child,
+ children[next_call_child],
+ children[next_call_child]->fops->getxattr,
+ &local->loc,
+ local->cont.getxattr.name,
+ NULL);
+ }
+
+out:
+ if (unwind) {
+ if (op_ret >= 0 && dict)
+ __filter_xattrs (dict);
+
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
+ }
+
+ return 0;
+}
+
+int32_t
+afr_getxattr_unwind (call_frame_t *frame, int op_ret, int op_errno,
+ dict_t *dict, dict_t *xdata)
+
+{
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+int32_t
+afr_fgetxattr_clrlk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ dict_t *xattr = NULL;
+ char *tmp_report = NULL;
+ char lk_summary[1024] = {0,};
+ int serz_len = 0;
+ int32_t callcnt = 0;
+ long int cky = 0;
+ int ret = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+ cky = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+ if (op_ret == -1)
+ local->child_errno[cky] = op_errno;
+
+ if (!local->dict)
+ local->dict = dict_new ();
+ if (local->dict) {
+ ret = dict_get_str (dict, local->cont.getxattr.name,
+ &tmp_report);
+ if (ret)
+ goto unlock;
+ ret = dict_set_dynstr (local->dict,
+ children[cky]->name,
+ gf_strdup (tmp_report));
+ if (ret)
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ xattr = dict_new ();
+ if (!xattr) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ ret = dict_serialize_value_with_delim (local->dict,
+ lk_summary,
+ &serz_len, '\n');
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error serializing dictionary");
+ goto unwind;
+ }
+ if (serz_len == -1)
+ snprintf (lk_summary, sizeof (lk_summary),
+ "No locks cleared.");
+ ret = dict_set_dynstr (xattr, local->cont.getxattr.name,
+ gf_strdup (lk_summary));
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error setting dictionary");
+ goto unwind;
+ }
+
+ unwind:
+ // Updating child_errno with more recent 'events'
+ local->child_errno[cky] = op_errno;
+ op_errno = afr_resultant_errno_get (NULL, local->child_errno,
+ priv->child_count);
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, xattr,
+ xdata);
- if (all_tried (last_tried, priv->child_count)) {
- goto out;
- }
- this_try = ++local->cont.getxattr.last_tried;
+ if (xattr)
+ dict_unref (xattr);
+ }
- if (this_try == read_child) {
- goto retry;
+ return ret;
+}
+
+int32_t
+afr_getxattr_clrlk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ dict_t *xattr = NULL;
+ char *tmp_report = NULL;
+ char lk_summary[1024] = {0,};
+ int serz_len = 0;
+ int32_t callcnt = 0;
+ long int cky = 0;
+ int ret = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+ cky = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+ if (op_ret == -1)
+ local->child_errno[cky] = op_errno;
+
+ if (!local->dict)
+ local->dict = dict_new ();
+ if (local->dict) {
+ ret = dict_get_str (dict, local->cont.getxattr.name,
+ &tmp_report);
+ if (ret)
+ goto unlock;
+ ret = dict_set_dynstr (local->dict,
+ children[cky]->name,
+ gf_strdup (tmp_report));
+ if (ret)
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ xattr = dict_new ();
+ if (!xattr) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ ret = dict_serialize_value_with_delim (local->dict,
+ lk_summary,
+ &serz_len, '\n');
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error serializing dictionary");
+ goto unwind;
+ }
+ if (serz_len == -1)
+ snprintf (lk_summary, sizeof (lk_summary),
+ "No locks cleared.");
+ ret = dict_set_dynstr (xattr, local->cont.getxattr.name,
+ gf_strdup (lk_summary));
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error setting dictionary");
+ goto unwind;
}
- unwind = 0;
- STACK_WIND_COOKIE (frame, afr_getxattr_cbk,
- (void *) (long) read_child,
- children[this_try],
- children[this_try]->fops->getxattr,
- &local->loc,
- local->cont.getxattr.name);
- }
+ unwind:
+ // Updating child_errno with more recent 'events'
+ local->child_errno[cky] = op_errno;
+ op_errno = afr_resultant_errno_get (NULL, local->child_errno,
+ priv->child_count);
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, xdata);
-out:
- if (unwind) {
- __filter_xattrs (dict);
+ if (xattr)
+ dict_unref (xattr);
+ }
- AFR_STACK_UNWIND (frame, op_ret, op_errno, dict);
- }
+ return ret;
+}
- return 0;
+/**
+ * node-uuid cbk uses next child querying mechanism
+ */
+int32_t
+afr_getxattr_node_uuid_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
+ int unwind = 1;
+ int curr_call_child = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+
+ if (op_ret == -1) { /** query the _next_ child */
+
+ /**
+ * _current_ becomes _next_
+ * If done with all childs and yet no success; give up !
+ */
+ curr_call_child = (int) ((long)cookie);
+ if (++curr_call_child == priv->child_count)
+ goto unwind;
+
+ gf_log (this->name, GF_LOG_WARNING,
+ "op_ret (-1): Re-querying afr-child (%d/%d)",
+ curr_call_child, priv->child_count);
+
+ unwind = 0;
+ STACK_WIND_COOKIE (frame, afr_getxattr_node_uuid_cbk,
+ (void *) (long) curr_call_child,
+ children[curr_call_child],
+ children[curr_call_child]->fops->getxattr,
+ &local->loc,
+ local->cont.getxattr.name,
+ NULL);
+ }
+
+ unwind:
+ if (unwind)
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict,
+ NULL);
+
+ return 0;
}
+int32_t
+afr_getxattr_lockinfo_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ int call_cnt = 0, len = 0;
+ char *lockinfo_buf = NULL;
+ dict_t *lockinfo = NULL, *newdict = NULL;
+ afr_local_t *local = NULL;
+
+ LOCK (&frame->lock);
+ {
+ local = frame->local;
+
+ call_cnt = --local->call_count;
+
+ if ((op_ret < 0) || (!dict && !xdata)) {
+ goto unlock;
+ }
+
+ if (xdata) {
+ if (!local->xdata_rsp) {
+ local->xdata_rsp = dict_new ();
+ if (!local->xdata_rsp) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+
+ if (!dict) {
+ goto unlock;
+ }
+
+ op_ret = dict_get_ptr_and_len (dict, GF_XATTR_LOCKINFO_KEY,
+ (void **)&lockinfo_buf, &len);
+
+ if (!lockinfo_buf) {
+ goto unlock;
+ }
+
+ if (!local->dict) {
+ local->dict = dict_new ();
+ if (!local->dict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (lockinfo_buf != NULL) {
+ lockinfo = dict_new ();
+ if (lockinfo == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ } else {
+ op_ret = dict_unserialize (lockinfo_buf, len,
+ &lockinfo);
+
+ if (lockinfo && local->dict) {
+ dict_copy (lockinfo, local->dict);
+ }
+ }
+ }
+
+ if (xdata && local->xdata_rsp) {
+ dict_copy (xdata, local->xdata_rsp);
+ }
+
+ if (!call_cnt) {
+ newdict = dict_new ();
+ if (!newdict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ len = dict_serialized_length (local->dict);
+ if (len == 0) {
+ goto unwind;
+ }
+
+ lockinfo_buf = GF_CALLOC (1, len, gf_common_mt_char);
+ if (!lockinfo_buf) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ op_ret = dict_serialize (local->dict, lockinfo_buf);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ }
+
+ op_ret = dict_set_dynptr (newdict, GF_XATTR_LOCKINFO_KEY,
+ (void *)lockinfo_buf, len);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ goto unwind;
+ }
+
+ unwind:
+ AFR_STACK_UNWIND (getxattr, frame, op_ret,
+ op_errno, newdict,
+ local->xdata_rsp);
+ }
+
+ dict_unref (lockinfo);
+
+ return 0;
+}
int32_t
-afr_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+afr_fgetxattr_lockinfo_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- afr_local_t * local = NULL;
+ int call_cnt = 0, len = 0;
+ char *lockinfo_buf = NULL;
+ dict_t *lockinfo = NULL, *newdict = NULL;
+ afr_local_t *local = NULL;
- int read_child = -1;
+ LOCK (&frame->lock);
+ {
+ local = frame->local;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+ call_cnt = --local->call_count;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ if ((op_ret < 0) || (!dict && !xdata)) {
+ goto unlock;
+ }
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
+ if (xdata) {
+ if (!local->xdata_rsp) {
+ local->xdata_rsp = dict_new ();
+ if (!local->xdata_rsp) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
- children = priv->children;
+ if (!dict) {
+ goto unlock;
+ }
+
+ op_ret = dict_get_ptr_and_len (dict, GF_XATTR_LOCKINFO_KEY,
+ (void **)&lockinfo_buf, &len);
- ALLOC_OR_GOTO (local, afr_local_t, out);
- frame->local = local;
+ if (!lockinfo_buf) {
+ goto unlock;
+ }
- if (name) {
- if (!strncmp (name, AFR_XATTR_PREFIX,
- strlen (AFR_XATTR_PREFIX))) {
+ if (!local->dict) {
+ local->dict = dict_new ();
+ if (!local->dict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (lockinfo_buf != NULL) {
+ lockinfo = dict_new ();
+ if (lockinfo == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ } else {
+ op_ret = dict_unserialize (lockinfo_buf, len,
+ &lockinfo);
+
+ if (lockinfo && local->dict) {
+ dict_copy (lockinfo, local->dict);
+ }
+ }
+ }
+
+ if (xdata && local->xdata_rsp) {
+ dict_copy (xdata, local->xdata_rsp);
+ }
+
+ if (!call_cnt) {
+ newdict = dict_new ();
+ if (!newdict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ len = dict_serialized_length (local->dict);
+ if (len <= 0) {
+ goto unwind;
+ }
+
+ lockinfo_buf = GF_CALLOC (1, len, gf_common_mt_char);
+ if (!lockinfo_buf) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ op_ret = dict_serialize (local->dict, lockinfo_buf);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ }
+
+ op_ret = dict_set_dynptr (newdict, GF_XATTR_LOCKINFO_KEY,
+ (void *)lockinfo_buf, len);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ goto unwind;
+ }
+
+ unwind:
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret,
+ op_errno, newdict,
+ local->xdata_rsp);
+ }
- op_errno = ENODATA;
+ dict_unref (lockinfo);
+
+ return 0;
+}
+
+int32_t
+afr_fgetxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t callcnt = 0;
+ int ret = 0;
+ char *xattr = NULL;
+ char *xattr_serz = NULL;
+ char xattr_cky[1024] = {0,};
+ dict_t *nxattr = NULL;
+ long cky = 0;
+ int32_t padding = 0;
+ int32_t tlen = 0;
+
+ if (!frame || !frame->local || !this) {
+ gf_log ("", GF_LOG_ERROR, "possible NULL deref");
+ goto out;
+ }
+
+ local = frame->local;
+ cky = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (!dict || (op_ret < 0))
goto out;
+
+ if (!local->dict)
+ local->dict = dict_new ();
+
+ if (local->dict) {
+ ret = dict_get_str (dict,
+ local->cont.getxattr.name,
+ &xattr);
+ if (ret)
+ goto out;
+
+ xattr = gf_strdup (xattr);
+
+ (void)snprintf (xattr_cky, 1024, "%s-%ld",
+ local->cont.getxattr.name, cky);
+ ret = dict_set_dynstr (local->dict,
+ xattr_cky, xattr);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Cannot set xattr cookie key");
+ goto out;
+ }
+
+ local->cont.getxattr.xattr_len
+ += strlen (xattr) + 1;
+ }
+ }
+out:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (!local->cont.getxattr.xattr_len)
+ goto unwind;
+
+ nxattr = dict_new ();
+ if (!nxattr)
+ goto unwind;
+
+ /* extra bytes for decorations (brackets and <>'s) */
+ padding += strlen (this->name)
+ + strlen (AFR_PATHINFO_HEADER) + 4;
+ local->cont.getxattr.xattr_len += (padding + 2);
+
+ xattr_serz = GF_CALLOC (local->cont.getxattr.xattr_len,
+ sizeof (char), gf_common_mt_char);
+
+ if (!xattr_serz)
+ goto unwind;
+
+ /* the xlator info */
+ (void) sprintf (xattr_serz, "(<"AFR_PATHINFO_HEADER"%s> ",
+ this->name);
+
+ /* actual series of pathinfo */
+ ret = dict_serialize_value_with_delim (local->dict,
+ xattr_serz
+ + strlen (xattr_serz),
+ &tlen, ' ');
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Error serializing"
+ " dictionary");
+ goto unwind;
+ }
+
+ /* closing part */
+ *(xattr_serz + padding + tlen) = ')';
+ *(xattr_serz + padding + tlen + 1) = '\0';
+
+ ret = dict_set_dynstr (nxattr, local->cont.getxattr.name,
+ xattr_serz);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Cannot set pathinfo"
+ " key in dict");
+
+ unwind:
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, nxattr,
+ xdata);
+
+ if (nxattr)
+ dict_unref (nxattr);
+ }
+
+ return ret;
+}
+
+int32_t
+afr_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t callcnt = 0;
+ int ret = 0;
+ char *xattr = NULL;
+ char *xattr_serz = NULL;
+ char xattr_cky[1024] = {0,};
+ dict_t *nxattr = NULL;
+ long cky = 0;
+ int32_t padding = 0;
+ int32_t tlen = 0;
+
+ if (!frame || !frame->local || !this) {
+ gf_log ("", GF_LOG_ERROR, "possible NULL deref");
+ goto out;
+ }
+
+ local = frame->local;
+ cky = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (!dict || (op_ret < 0))
+ goto out;
+
+ if (!local->dict)
+ local->dict = dict_new ();
+
+ if (local->dict) {
+ ret = dict_get_str (dict,
+ local->cont.getxattr.name,
+ &xattr);
+ if (ret)
+ goto out;
+
+ xattr = gf_strdup (xattr);
+
+ (void)snprintf (xattr_cky, 1024, "%s-%ld",
+ local->cont.getxattr.name, cky);
+ ret = dict_set_dynstr (local->dict,
+ xattr_cky, xattr);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Cannot set xattr cookie key");
+ goto out;
+ }
+
+ local->cont.getxattr.xattr_len += strlen (xattr) + 1;
+ }
+ }
+ out:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (!local->cont.getxattr.xattr_len)
+ goto unwind;
+
+ nxattr = dict_new ();
+ if (!nxattr)
+ goto unwind;
+
+ /* extra bytes for decorations (brackets and <>'s) */
+ padding += strlen (this->name) + strlen (AFR_PATHINFO_HEADER) + 4;
+ local->cont.getxattr.xattr_len += (padding + 2);
+
+ xattr_serz = GF_CALLOC (local->cont.getxattr.xattr_len,
+ sizeof (char), gf_common_mt_char);
+
+ if (!xattr_serz)
+ goto unwind;
+
+ /* the xlator info */
+ (void) sprintf (xattr_serz, "(<"AFR_PATHINFO_HEADER"%s> ",
+ this->name);
+
+ /* actual series of pathinfo */
+ ret = dict_serialize_value_with_delim (local->dict,
+ xattr_serz + strlen (xattr_serz),
+ &tlen, ' ');
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Error serializing"
+ " dictionary");
+ goto unwind;
+ }
+
+ /* closing part */
+ *(xattr_serz + padding + tlen) = ')';
+ *(xattr_serz + padding + tlen + 1) = '\0';
+
+ ret = dict_set_dynstr (nxattr, local->cont.getxattr.name,
+ xattr_serz);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Cannot set pathinfo"
+ " key in dict");
+
+ unwind:
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, nxattr,
+ xdata);
+
+ if (nxattr)
+ dict_unref (nxattr);
+ }
+
+ return ret;
+}
+
+static int
+afr_aggregate_stime_xattr (dict_t *this, char *key, data_t *value, void *data)
+{
+ int ret = 0;
+
+ if (fnmatch (GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0)
+ ret = gf_get_min_stime (THIS, data, key, value);
+
+ return ret;
+}
+
+int32_t
+afr_common_getxattr_stime_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t callcnt = 0;
+
+ if (!frame || !frame->local || !this) {
+ gf_log ("", GF_LOG_ERROR, "possible NULL deref");
+ goto out;
+ }
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (!dict || (op_ret < 0)) {
+ local->op_errno = op_errno;
+ goto cleanup;
}
+
+ if (!local->dict)
+ local->dict = dict_copy_with_ref (dict, NULL);
+ else
+ dict_foreach (dict, afr_aggregate_stime_xattr,
+ local->dict);
+ local->op_ret = 0;
}
- read_child = afr_read_child (this, loc->inode);
+cleanup:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ AFR_STACK_UNWIND (getxattr, frame, local->op_ret,
+ local->op_errno, local->dict, xdata);
+ }
- if (read_child >= 0) {
- call_child = read_child;
+out:
+ return 0;
+}
+
+
+static gf_boolean_t
+afr_is_special_xattr (const char *name, fop_getxattr_cbk_t *cbk,
+ gf_boolean_t is_fgetxattr)
+{
+ gf_boolean_t is_spl = _gf_true;
+
+ GF_ASSERT (cbk);
+ if (!cbk) {
+ is_spl = _gf_false;
+ goto out;
+ }
- local->cont.getxattr.last_tried = -1;
+ if (!strcmp (name, GF_XATTR_PATHINFO_KEY)) {
+ if (is_fgetxattr) {
+ *cbk = afr_fgetxattr_pathinfo_cbk;
+ } else {
+ *cbk = afr_getxattr_pathinfo_cbk;
+ }
+ } else if (!strncmp (name, GF_XATTR_CLRLK_CMD,
+ strlen (GF_XATTR_CLRLK_CMD))) {
+ if (is_fgetxattr) {
+ *cbk = afr_fgetxattr_clrlk_cbk;
+ } else {
+ *cbk = afr_getxattr_clrlk_cbk;
+ }
+ } else if (!strncmp (name, GF_XATTR_LOCKINFO_KEY,
+ strlen (GF_XATTR_LOCKINFO_KEY))) {
+ if (is_fgetxattr) {
+ *cbk = afr_fgetxattr_lockinfo_cbk;
+ } else {
+ *cbk = afr_getxattr_lockinfo_cbk;
+ }
+ } else if (fnmatch (GF_XATTR_STIME_PATTERN, name, FNM_NOESCAPE) == 0) {
+ *cbk = afr_common_getxattr_stime_cbk;
} else {
- call_child = afr_first_up_child (priv);
+ is_spl = _gf_false;
+ }
+
+out:
+ return is_spl;
+}
+
+static void
+afr_getxattr_frm_all_children (xlator_t *this, call_frame_t *frame,
+ const char *name, loc_t *loc,
+ fop_getxattr_cbk_t cbk)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
+ int i = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+ local->call_count = priv->child_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ STACK_WIND_COOKIE (frame, cbk,
+ (void *) (long) i,
+ children[i], children[i]->fops->getxattr,
+ loc, name, NULL);
+ }
+ return;
+}
+
+int32_t
+afr_getxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ int call_child = 0;
+ afr_local_t *local = NULL;
+ xlator_list_t *trav = NULL;
+ xlator_t **sub_volumes = NULL;
+ int i = 0;
+ int32_t op_errno = 0;
+ int32_t read_child = -1;
+ int ret = -1;
+ fop_getxattr_cbk_t cbk = NULL;
+ int afr_xtime_gauge[MCNT_MAX] = {0,};
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv->children, out);
+
+ children = priv->children;
+
+ AFR_SBRAIN_CHECK_LOC (loc, out);
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_DEBUG,
- "no child is up");
+ loc_copy (&local->loc, loc);
+ if (!name)
+ goto no_name;
+
+ local->cont.getxattr.name = gf_strdup (name);
+
+ if (!strncmp (name, AFR_XATTR_PREFIX,
+ strlen (AFR_XATTR_PREFIX))) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%s: no data present for key %s",
+ loc->path, name);
+ op_errno = ENODATA;
+ goto out;
+ }
+ if ((strcmp (GF_XATTR_MARKER_KEY, name) == 0)
+ && (GF_CLIENT_PID_GSYNCD == frame->root->pid)) {
+
+ local->marker.call_count = priv->child_count;
+
+ sub_volumes = alloca ( priv->child_count * sizeof (xlator_t *));
+ for (i = 0, trav = this->children; trav ;
+ trav = trav->next, i++) {
+
+ *(sub_volumes + i) = trav->xlator;
+ }
+
+ if (cluster_getmarkerattr (frame, this, loc, name,
+ local, afr_getxattr_unwind,
+ sub_volumes,
+ priv->child_count,
+ MARKER_UUID_TYPE,
+ marker_uuid_default_gauge,
+ priv->vol_uuid)) {
+
+ gf_log (this->name, GF_LOG_INFO,
+ "%s: failed to get marker attr (%s)",
+ loc->path, name);
+ op_errno = EINVAL;
goto out;
}
- local->cont.getxattr.last_tried = call_child;
+ return 0;
+ }
+
+ /*
+ * if we are doing getxattr with pathinfo as the key then we
+ * collect information from all childs
+ */
+ if (afr_is_special_xattr (name, &cbk, 0)) {
+ afr_getxattr_frm_all_children (this, frame, name,
+ loc, cbk);
+ return 0;
+ }
+
+ if (XATTR_IS_NODE_UUID (name)) {
+ i = 0;
+ STACK_WIND_COOKIE (frame, afr_getxattr_node_uuid_cbk,
+ (void *) (long) i,
+ children[i],
+ children[i]->fops->getxattr,
+ loc, name, xdata);
+ return 0;
+ }
+
+ if (*priv->vol_uuid) {
+ if ((match_uuid_local (name, priv->vol_uuid) == 0)
+ && (GF_CLIENT_PID_GSYNCD == frame->root->pid)) {
+ local->marker.call_count = priv->child_count;
+
+ sub_volumes = alloca ( priv->child_count
+ * sizeof (xlator_t *));
+ for (i = 0, trav = this->children; trav ;
+ trav = trav->next, i++) {
+
+ *(sub_volumes + i) = trav->xlator;
+
+ }
+
+ /* don't err out on getting ENOTCONN (brick down)
+ * from a subset of the bricks
+ */
+ memcpy (afr_xtime_gauge, marker_xtime_default_gauge,
+ sizeof (afr_xtime_gauge));
+ afr_xtime_gauge[MCNT_NOTFOUND] = 0;
+ afr_xtime_gauge[MCNT_ENOTCONN] = 0;
+ if (cluster_getmarkerattr (frame, this, loc,
+ name, local,
+ afr_getxattr_unwind,
+ sub_volumes,
+ priv->child_count,
+ MARKER_XTIME_TYPE,
+ afr_xtime_gauge,
+ priv->vol_uuid)) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%s: failed to get marker attr (%s)",
+ loc->path, name);
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ return 0;
+ }
+ }
+
+no_name:
+ local->fresh_children = afr_children_create (priv->child_count);
+ if (!local->fresh_children) {
+ op_errno = ENOMEM;
+ goto out;
}
- loc_copy (&local->loc, loc);
- if (name)
- local->cont.getxattr.name = strdup (name);
+ read_child = afr_inode_get_read_ctx (this, loc->inode,
+ local->fresh_children);
+ ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.getxattr.last_index);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- STACK_WIND_COOKIE (frame, afr_getxattr_cbk,
- (void *) (long) call_child,
- children[call_child], children[call_child]->fops->getxattr,
- loc, name);
+ STACK_WIND_COOKIE (frame, afr_getxattr_cbk,
+ (void *) (long) call_child,
+ children[call_child],
+ children[call_child]->fops->getxattr,
+ loc, name, xdata);
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
- return 0;
+ if (ret < 0)
+ AFR_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+/* {{{ fgetxattr */
+
+
+int32_t
+afr_fgetxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+ int unwind = 1;
+ int32_t *last_index = NULL;
+ int32_t next_call_child = -1;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+
+ read_child = (long) cookie;
+
+ if (op_ret == -1) {
+ last_index = &local->cont.getxattr.last_index;
+ fresh_children = local->fresh_children;
+ next_call_child = afr_next_call_child (fresh_children,
+ local->child_up,
+ priv->child_count,
+ last_index, read_child);
+ if (next_call_child < 0)
+ goto out;
+
+ unwind = 0;
+ STACK_WIND_COOKIE (frame, afr_fgetxattr_cbk,
+ (void *) (long) read_child,
+ children[next_call_child],
+ children[next_call_child]->fops->fgetxattr,
+ local->fd,
+ local->cont.getxattr.name,
+ NULL);
+ }
+
+out:
+ if (unwind) {
+ if (op_ret >= 0 && dict)
+ __filter_xattrs (dict);
+
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, dict,
+ xdata);
+ }
+
+ return 0;
+}
+
+int32_t
+afr_fgetxattr_unwind (call_frame_t *frame,
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
+
+{
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+static void
+afr_fgetxattr_frm_all_children (xlator_t *this, call_frame_t *frame,
+ const char *name, fd_t *fd,
+ fop_fgetxattr_cbk_t cbk)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
+ int i = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+ local->call_count = priv->child_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ STACK_WIND_COOKIE (frame, cbk,
+ (void *) (long) i,
+ children[i], children[i]->fops->fgetxattr,
+ fd, name, NULL);
+ }
+
+ return;
+}
+
+int32_t
+afr_fgetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ int call_child = 0;
+ afr_local_t *local = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int32_t read_child = -1;
+ fop_fgetxattr_cbk_t cbk = NULL;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv->children, out);
+
+ children = priv->children;
+
+ AFR_SBRAIN_CHECK_FD (fd, out);
+
+ AFR_LOCAL_ALLOC_OR_GOTO (local, out);
+ frame->local = local;
+
+ op_ret = afr_local_init (local, priv, &op_errno);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
+
+ local->fd = fd_ref (fd);
+ if (name)
+ local->cont.getxattr.name = gf_strdup (name);
+
+ /* pathinfo gets handled only in getxattr(), but we need to handle
+ * lockinfo.
+ * If we are doing fgetxattr with lockinfo as the key then we
+ * collect information from all children.
+ */
+ if (afr_is_special_xattr (name, &cbk, 1)) {
+ afr_fgetxattr_frm_all_children (this, frame, name,
+ fd, cbk);
+ return 0;
+ }
+
+
+ local->fresh_children = afr_children_create (priv->child_count);
+ if (!local->fresh_children) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ read_child = afr_inode_get_read_ctx (this, fd->inode,
+ local->fresh_children);
+ op_ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.getxattr.last_index);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ goto out;
+ }
+
+ STACK_WIND_COOKIE (frame, afr_fgetxattr_cbk,
+ (void *) (long) call_child,
+ children[call_child],
+ children[call_child]->fops->fgetxattr,
+ fd, name, xdata);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, NULL,
+ NULL);
+ }
+ return 0;
}
@@ -741,136 +1811,130 @@ out:
int32_t
afr_readv_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iovec *vector, int32_t count, struct stat *buf,
- struct iobref *iobref)
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ struct iovec *vector, int32_t count, struct iatt *buf,
+ struct iobref *iobref, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
-
- int unwind = 1;
- int last_tried = -1;
- int this_try = -1;
- int read_child = -1;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+ int unwind = 1;
+ int32_t *last_index = NULL;
+ int32_t next_call_child = -1;
+ int32_t *fresh_children = NULL;
+ int32_t read_child = -1;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv->children, out);
- children = priv->children;
+ children = priv->children;
- local = frame->local;
+ local = frame->local;
read_child = (long) cookie;
- if (op_ret == -1) {
- retry:
- last_tried = local->cont.readv.last_tried;
-
- if (all_tried (last_tried, priv->child_count)) {
- goto out;
- }
- this_try = ++local->cont.readv.last_tried;
-
- if (this_try == read_child) {
- /*
- skip the read child since if we are here
- we must have already tried that child
- */
- goto retry;
- }
-
- unwind = 0;
-
- STACK_WIND_COOKIE (frame, afr_readv_cbk,
- (void *) (long) read_child,
- children[this_try],
- children[this_try]->fops->readv,
- local->fd, local->cont.readv.size,
- local->cont.readv.offset);
- }
+ if (op_ret == -1) {
+ last_index = &local->cont.readv.last_index;
+ fresh_children = local->fresh_children;
+ next_call_child = afr_next_call_child (fresh_children,
+ local->child_up,
+ priv->child_count,
+ last_index, read_child);
+ if (next_call_child < 0)
+ goto out;
+
+ unwind = 0;
+
+ STACK_WIND_COOKIE (frame, afr_readv_cbk,
+ (void *) (long) read_child,
+ children[next_call_child],
+ children[next_call_child]->fops->readv,
+ local->fd, local->cont.readv.size,
+ local->cont.readv.offset,
+ local->cont.readv.flags,
+ NULL);
+ }
out:
- if (unwind) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno, vector, count, buf,
- iobref);
- }
+ if (unwind) {
+ AFR_STACK_UNWIND (readv, frame, op_ret, op_errno,
+ vector, count, buf, iobref, xdata);
+ }
- return 0;
+ return 0;
}
int32_t
afr_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset)
+ fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
-
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+ int call_child = 0;
+ int32_t op_errno = 0;
int32_t read_child = -1;
- int call_child = 0;
-
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (fd, out);
+ int ret = -1;
- priv = this->private;
- children = priv->children;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+ VALIDATE_OR_GOTO (fd, out);
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ priv = this->private;
+ children = priv->children;
- frame->local = local;
+ AFR_SBRAIN_CHECK_FD (fd, out);
- read_child = afr_read_child (this, fd->inode);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- if (read_child >= 0) {
- call_child = read_child;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- /*
- if read fails from the read child, we try
- all children starting with the first one
- */
- local->cont.readv.last_tried = -1;
+ local->fresh_children = afr_children_create (priv->child_count);
+ if (!local->fresh_children) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- } else {
- call_child = afr_first_up_child (priv);
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_DEBUG,
- "no child is up");
- goto out;
- }
+ read_child = afr_inode_get_read_ctx (this, fd->inode, local->fresh_children);
+ ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.readv.last_index);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- local->cont.readv.last_tried = call_child;
- }
+ local->fd = fd_ref (fd);
- local->fd = fd_ref (fd);
+ local->cont.readv.size = size;
+ local->cont.readv.offset = offset;
+ local->cont.readv.flags = flags;
- local->cont.readv.size = size;
- local->cont.readv.offset = offset;
+ afr_open_fd_fix (fd, this);
- STACK_WIND_COOKIE (frame, afr_readv_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->readv,
- fd, size, offset);
+ STACK_WIND_COOKIE (frame, afr_readv_cbk,
+ (void *) (long) call_child,
+ children[call_child],
+ children[call_child]->fops->readv,
+ fd, size, offset, flags, xdata);
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL, 0, NULL,
- NULL);
- }
- return 0;
+ if (ret < 0) {
+ AFR_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL,
+ NULL, NULL);
+ }
+ return 0;
}
/* }}} */
diff --git a/xlators/cluster/afr/src/afr-inode-read.h b/xlators/cluster/afr/src/afr-inode-read.h
index 1127cb652..e4091a793 100644
--- a/xlators/cluster/afr/src/afr-inode-read.h
+++ b/xlators/cluster/afr/src/afr-inode-read.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __INODE_READ_H__
@@ -22,26 +13,30 @@
int32_t
afr_access (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t mask);
+ loc_t *loc, int32_t mask, dict_t *xdata);
int32_t
afr_stat (call_frame_t *frame, xlator_t *this,
- loc_t *loc);
+ loc_t *loc, dict_t *xdata);
int32_t
afr_fstat (call_frame_t *frame, xlator_t *this,
- fd_t *fd);
+ fd_t *fd, dict_t *xdata);
int32_t
afr_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size);
+ loc_t *loc, size_t size, dict_t *xdata);
int32_t
afr_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset);
+ fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata);
int32_t
afr_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name);
+ loc_t *loc, const char *name, dict_t *xdata);
+
+int32_t
+afr_fgetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata);
#endif /* __INODE_READ_H__ */
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index 57fdf20d5..c1ec69a55 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -46,1967 +37,2825 @@
#include "afr.h"
#include "afr-transaction.h"
+#include "afr-self-heal-common.h"
+void
+__inode_write_fop_cbk (call_frame_t *frame, int child_index, int read_child,
+ xlator_t *this, int32_t *op_ret, int32_t *op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (afr_fop_failed (*op_ret, *op_errno)) {
+ local->child_errno[child_index] = *op_errno;
+
+ switch (local->op) {
+ case GF_FOP_TRUNCATE:
+ case GF_FOP_FTRUNCATE:
+ if (*op_errno != EFBIG)
+ afr_transaction_fop_failed (frame, this,
+ child_index);
+ break;
+ default:
+ afr_transaction_fop_failed (frame, this, child_index);
+ break;
+ }
+ local->op_errno = *op_errno;
+ goto out;
+ }
+
+ if ((local->success_count == 0) || (read_child == child_index)) {
+ local->op_ret = *op_ret;
+ if (prebuf)
+ local->cont.inode_wfop.prebuf = *prebuf;
+ if (postbuf)
+ local->cont.inode_wfop.postbuf = *postbuf;
+ }
+
+ local->success_count++;
+out:
+ return;
+}
-/* {{{ chmod */
-
+/* {{{ writev */
-int
-afr_chmod_unwind (call_frame_t *frame, xlator_t *this)
+void
+afr_writev_copy_outvars (call_frame_t *src_frame, call_frame_t *dst_frame)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- call_frame_t *main_frame = NULL;
+ afr_local_t *src_local = NULL;
+ afr_local_t *dst_local = NULL;
- local = frame->local;
- priv = this->private;
+ src_local = src_frame->local;
+ dst_local = dst_frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ dst_local->op_ret = src_local->op_ret;
+ dst_local->op_errno = src_local->op_errno;
+ dst_local->cont.inode_wfop.prebuf = src_local->cont.inode_wfop.prebuf;
+ dst_local->cont.inode_wfop.postbuf = src_local->cont.inode_wfop.postbuf;
+}
- if (main_frame) {
- local->cont.chmod.buf.st_ino = local->cont.chmod.ino;
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
- &local->cont.chmod.buf);
- }
- return 0;
+void
+afr_writev_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ local = frame->local;
+
+ AFR_STACK_UNWIND (writev, frame,
+ local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ NULL);
}
+call_frame_t*
+afr_transaction_detach_fop_frame (call_frame_t *frame)
+{
+ afr_local_t * local = NULL;
+ call_frame_t *fop_frame = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ fop_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ return fop_frame;
+}
int
-afr_chmod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+afr_transaction_writev_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ call_frame_t *fop_frame = NULL;
- int call_count = -1;
- int child_index = (long) cookie;
- int need_unwind = 0;
+ fop_frame = afr_transaction_detach_fop_frame (frame);
- local = frame->local;
- priv = this->private;
+ if (fop_frame) {
+ afr_writev_copy_outvars (frame, fop_frame);
+ afr_writev_unwind (fop_frame, this);
+ }
+ return 0;
+}
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
+static void
+afr_writev_handle_short_writes (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+ /*
+ * We already have the best case result of the writev calls staged
+ * as the return value. Any writev that returns some value less
+ * than the best case is now out of sync, so mark the fop as
+ * failed. Note that fops that have returned with errors have
+ * already been marked as failed.
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if ((!local->replies[i].valid) ||
+ (local->replies[i].op_ret == -1))
+ continue;
+
+ if (local->replies[i].op_ret < local->op_ret)
+ afr_transaction_fop_failed(frame, this, i);
+ }
+}
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.chmod.buf = *buf;
- }
- local->success_count++;
-
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
+int
+afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t * local = NULL;
+ afr_private_t *priv = NULL;
+ call_frame_t *fop_frame = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
+ int read_child = 0;
+ int ret = 0;
+ uint32_t open_fd_count = 0;
+ uint32_t write_is_append = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
+
+ LOCK (&frame->lock);
+ {
+ if (child_index == read_child) {
+ local->read_child_returned = _gf_true;
+ }
+
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, prebuf, postbuf,
+ xdata);
+
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+
+
+ /* stage the best case return value for unwind */
+ if ((local->success_count == 0) || (op_ret > local->op_ret)) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
}
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- if (need_unwind)
- afr_chmod_unwind (frame, this);
+ if (op_ret != -1) {
+ if (xdata) {
+ ret = dict_get_uint32 (xdata,
+ GLUSTERFS_OPEN_FD_COUNT,
+ &open_fd_count);
+ if ((ret == 0) &&
+ (open_fd_count > local->open_fd_count)) {
+ local->open_fd_count = open_fd_count;
+ local->update_open_fd_count = _gf_true;
+ }
+
+ write_is_append = 0;
+ ret = dict_get_uint32 (xdata,
+ GLUSTERFS_WRITE_IS_APPEND,
+ &write_is_append);
+ if (ret || !write_is_append)
+ local->append_write = _gf_false;
+ }
- call_count = afr_frame_return (frame);
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+
+ if (local->update_open_fd_count)
+ afr_handle_open_fd_count (frame, this);
+
+ if (!local->stable_write && !local->append_write)
+ /* An appended write removes the necessity to
+ fsync() the file. This is because self-heal
+ has the logic to check for larger file when
+ the xattrs are not reliably pointing at
+ a stale file.
+ */
+ afr_fd_report_unstable_write (this, local->fd);
+
+ afr_writev_handle_short_writes (frame, this);
+ if (afr_any_fops_failed (local, priv)) {
+ //Don't unwind until post-op is complete
+ local->transaction.resume (frame, this);
+ } else {
+ /*
+ * Generally inode-write fops do transaction.unwind then
+ * transaction.resume, but writev needs to make sure that
+ * delayed post-op frame is placed in fdctx before unwind
+ * happens. This prevents the race of flush doing the
+ * changelog wakeup first in fuse thread and then this
+ * writev placing its delayed post-op frame in fdctx.
+ * This helps flush make sure all the delayed post-ops are
+ * completed.
+ */
+
+ fop_frame = afr_transaction_detach_fop_frame (frame);
+ afr_writev_copy_outvars (frame, fop_frame);
+ local->transaction.resume (frame, this);
+ afr_writev_unwind (fop_frame, this);
+ }
+ }
+ return 0;
+}
- if (call_count == 0) {
- local->transaction.resume (frame, this);
+int
+afr_writev_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int call_count = -1;
+ dict_t *xdata = NULL;
+ GF_UNUSED int ret = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+ local->replies = GF_CALLOC(priv->child_count, sizeof(*local->replies),
+ gf_afr_mt_reply_t);
+ if (!local->replies) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ local->transaction.unwind(frame, this);
+ local->transaction.resume(frame, this);
+ return 0;
}
-
- return 0;
+
+ xdata = dict_new ();
+ if (xdata) {
+ ret = dict_set_uint32 (xdata, GLUSTERFS_OPEN_FD_COUNT,
+ sizeof (uint32_t));
+ ret = dict_set_uint32 (xdata, GLUSTERFS_WRITE_IS_APPEND,
+ 0);
+ /* Set append_write to be true speculatively. If on any
+ server it turns not be true, we unset it in the
+ callback.
+ */
+ local->append_write = _gf_true;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_writev_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->writev,
+ local->fd,
+ local->cont.writev.vector,
+ local->cont.writev.count,
+ local->cont.writev.offset,
+ local->cont.writev.flags,
+ local->cont.writev.iobref,
+ xdata);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
}
int
-afr_chmod_wind (call_frame_t *frame, xlator_t *this)
+afr_writev_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int i = 0;
- int call_count = -1;
+ afr_local_t *local = NULL;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ iobref_unref (local->cont.writev.iobref);
+ local->cont.writev.iobref = NULL;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ local->transaction.unwind (frame, this);
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_chmod_wind_cbk, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->chmod,
- &local->loc,
- local->cont.chmod.mode);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
}
int
-afr_chmod_done (call_frame_t *frame, xlator_t *this)
+afr_do_writev (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
-
- local = frame->local;
+ call_frame_t *transaction_frame = NULL;
+ afr_local_t *local = NULL;
+ int op_ret = -1;
+ int op_errno = 0;
+
+ local = frame->local;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ transaction_frame->local = local;
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+
+ local->op = GF_FOP_WRITE;
+
+ local->success_count = 0;
+
+ local->transaction.fop = afr_writev_wind;
+ local->transaction.done = afr_writev_done;
+ local->transaction.unwind = afr_transaction_writev_unwind;
+
+ local->transaction.main_frame = frame;
+ if (local->fd->flags & O_APPEND) {
+ /*
+ * Backend vfs ignores the 'offset' for append mode fd so
+ * locking just the region provided for the writev does not
+ * give consistency gurantee. The actual write may happen at a
+ * completely different range than the one provided by the
+ * offset, len in the fop. So lock the entire file.
+ */
+ local->transaction.start = 0;
+ local->transaction.len = 0;
+ } else {
+ local->transaction.start = local->cont.writev.offset;
+ local->transaction.len = iov_length (local->cont.writev.vector,
+ local->cont.writev.count);
+ }
+
+ op_ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (writev, frame, op_ret, op_errno, NULL, NULL, NULL);
+ }
- local->transaction.unwind (frame, this);
+ return 0;
+}
- AFR_STACK_DESTROY (frame);
-
- return 0;
+static void
+afr_trigger_open_fd_self_heal (fd_t *fd, xlator_t *this)
+{
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ char *reason = NULL;
+ int32_t op_errno = 0;
+ int ret = 0;
+
+ if (!fd || !fd->inode || uuid_is_null (fd->inode->gfid)) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid args: "
+ "fd: %p, inode: %p", fd,
+ fd ? fd->inode : NULL);
+ goto out;
+ }
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+ ret = afr_local_init (local, this->private, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ local->loc.inode = inode_ref (fd->inode);
+ ret = loc_path (&local->loc, NULL);
+ if (ret < 0)
+ goto out;
+
+ sh = &local->self_heal;
+ sh->do_metadata_self_heal = _gf_true;
+ if (fd->inode->ia_type == IA_IFREG)
+ sh->do_data_self_heal = _gf_true;
+ else if (fd->inode->ia_type == IA_IFDIR)
+ sh->do_entry_self_heal = _gf_true;
+
+ reason = "subvolume came online";
+ afr_launch_self_heal (frame, this, fd->inode, _gf_true,
+ fd->inode->ia_type, reason, NULL, NULL);
+ return;
+out:
+ AFR_STACK_DESTROY (frame);
}
+void
+afr_open_fd_fix (fd_t *fd, xlator_t *this)
+{
+ int ret = 0;
+ int i = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ gf_boolean_t need_self_heal = _gf_false;
+ int *need_open = NULL;
+ size_t need_open_count = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ if (!afr_is_fd_fixable (fd))
+ goto out;
+
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx)
+ goto out;
+
+ LOCK (&fd->lock);
+ {
+ if (fd_ctx->up_count < priv->up_count) {
+ need_self_heal = _gf_true;
+ fd_ctx->up_count = priv->up_count;
+ fd_ctx->down_count = priv->down_count;
+ }
+
+ need_open = alloca (priv->child_count * sizeof (*need_open));
+ for (i = 0; i < priv->child_count; i++) {
+ need_open[i] = 0;
+ if (fd_ctx->opened_on[i] != AFR_FD_NOT_OPENED)
+ continue;
+
+ if (!priv->child_up[i])
+ continue;
+
+ fd_ctx->opened_on[i] = AFR_FD_OPENING;
+
+ need_open[i] = 1;
+ need_open_count++;
+ }
+ }
+ UNLOCK (&fd->lock);
+ if (ret)
+ goto out;
+
+ if (need_self_heal)
+ afr_trigger_open_fd_self_heal (fd, this);
+
+ if (!need_open_count)
+ goto out;
+
+ afr_fix_open (this, fd, need_open_count, need_open);
+out:
+ return;
+}
-int32_t
-afr_chmod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode)
+int
+afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ int ret = -1;
+ int op_errno = 0;
- int ret = -1;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- int op_ret = -1;
- int op_errno = 0;
+ priv = this->private;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
- priv = this->private;
+ QUORUM_CHECK(writev,out);
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- ALLOC_OR_GOTO (local, afr_local_t, out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- transaction_frame->local = local;
+ local->cont.writev.vector = iov_dup (vector, count);
+ local->cont.writev.count = count;
+ local->cont.writev.offset = offset;
+ local->cont.writev.flags = flags;
+ local->cont.writev.iobref = iobref_ref (iobref);
- local->cont.chmod.mode = mode;
- local->cont.chmod.ino = loc->inode->ino;
+ local->fd = fd_ref (fd);
- local->transaction.fop = afr_chmod_wind;
- local->transaction.done = afr_chmod_done;
- local->transaction.unwind = afr_chmod_unwind;
+ /* detect here, but set it in writev_wind_cbk *after* the unstable
+ write is performed
+ */
+ local->stable_write = !!((fd->flags|flags)&(O_SYNC|O_DSYNC));
- loc_copy (&local->loc, loc);
-
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
+ afr_open_fd_fix (fd, this);
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ afr_do_writev (frame, this);
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-/* }}} */
+/* }}} */
-/* {{{ fchmod */
+/* {{{ truncate */
int
-afr_fchmod_unwind (call_frame_t *frame, xlator_t *this)
+afr_truncate_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- call_frame_t *main_frame = NULL;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- local->cont.fchmod.buf.st_ino = local->cont.fchmod.ino;
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
- &local->cont.fchmod.buf);
- }
- return 0;
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (truncate, main_frame, local->op_ret,
+ local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ NULL);
+ }
+
+ return 0;
}
int
-afr_fchmod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+afr_truncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ int child_index = (long) cookie;
+ int read_child = 0;
+ int call_count = -1;
- int call_count = -1;
- int child_index = (long) cookie;
- int need_unwind = 0;
+ local = frame->local;
- local = frame->local;
- priv = this->private;
+ read_child = afr_inode_get_read_ctx (this, local->loc.inode, NULL);
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
+ LOCK (&frame->lock);
+ {
+ if (child_index == read_child) {
+ local->read_child_returned = _gf_true;
+ }
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.fchmod.buf = *buf;
- }
- local->success_count++;
-
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
- }
+ if (op_ret != -1) {
+ if (prebuf->ia_size != postbuf->ia_size)
+ local->stable_write = _gf_false;
+ }
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, prebuf, postbuf,
+ xdata);
+ }
+ UNLOCK (&frame->lock);
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ call_count = afr_frame_return (frame);
- if (need_unwind)
- afr_fchmod_unwind (frame, this);
+ if (call_count == 0) {
+ if (local->stable_write && afr_txn_nothing_failed (frame, this))
+ local->transaction.unwind (frame, this);
- call_count = afr_frame_return (frame);
+ local->transaction.resume (frame, this);
+ }
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return 0;
}
-int
-afr_fchmod_wind (call_frame_t *frame, xlator_t *this)
+int32_t
+afr_truncate_wind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int i = 0;
- int call_count = -1;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_fchmod_wind_cbk, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fchmod,
- local->fd,
- local->cont.fchmod.mode);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+ local->stable_write = _gf_true;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_truncate_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->truncate,
+ &local->loc,
+ local->cont.truncate.offset,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
}
int
-afr_fchmod_done (call_frame_t *frame, xlator_t *this)
+afr_truncate_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- local->transaction.unwind (frame, this);
+ local->transaction.unwind (frame, this);
- AFR_STACK_DESTROY (frame);
-
- return 0;
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
}
-int32_t
-afr_fchmod (call_frame_t *frame, xlator_t *this,
- fd_t *fd, mode_t mode)
+int
+afr_truncate (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, off_t offset, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
- int ret = -1;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- int op_ret = -1;
- int op_errno = 0;
+ priv = this->private;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ QUORUM_CHECK(truncate,out);
- priv = this->private;
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- ALLOC_OR_GOTO (local, afr_local_t, out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- transaction_frame->local = local;
+ local->cont.truncate.offset = offset;
- local->cont.fchmod.mode = mode;
- local->cont.fchmod.ino = fd->inode->ino;
+ local->transaction.fop = afr_truncate_wind;
+ local->transaction.done = afr_truncate_done;
+ local->transaction.unwind = afr_truncate_unwind;
- local->transaction.fop = afr_fchmod_wind;
- local->transaction.done = afr_fchmod_done;
- local->transaction.unwind = afr_fchmod_unwind;
+ loc_copy (&local->loc, loc);
- local->fd = fd_ref (fd);
-
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
+ local->transaction.main_frame = frame;
+ local->transaction.start = offset;
+ local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL);
+ }
- return 0;
+ return 0;
}
+
/* }}} */
-/* {{{ chown */
+/* {{{ ftruncate */
+
int
-afr_chown_unwind (call_frame_t *frame, xlator_t *this)
+afr_ftruncate_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- call_frame_t *main_frame = NULL;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- local->cont.chown.buf.st_ino = local->cont.chown.ino;
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
- &local->cont.chown.buf);
- }
- return 0;
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (ftruncate, main_frame, local->op_ret,
+ local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ NULL);
+ }
+ return 0;
}
int
-afr_chown_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+afr_ftruncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int child_index = (long) cookie;
- int need_unwind = 0;
+ afr_local_t * local = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
+ int read_child = 0;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
+ read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.chown.buf = *buf;
- }
- local->success_count++;
-
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
- }
+ LOCK (&frame->lock);
+ {
+ if (child_index == read_child) {
+ local->read_child_returned = _gf_true;
+ }
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ if (op_ret != -1) {
+ if (prebuf->ia_size != postbuf->ia_size)
+ local->stable_write = _gf_false;
+ }
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, prebuf, postbuf,
+ xdata);
+ }
+ UNLOCK (&frame->lock);
- if (need_unwind) {
- local->transaction.unwind (frame, this);
- }
+ call_count = afr_frame_return (frame);
- call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ if (local->stable_write && afr_txn_nothing_failed (frame, this))
+ local->transaction.unwind (frame, this);
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
}
int
-afr_chown_wind (call_frame_t *frame, xlator_t *this)
+afr_ftruncate_wind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+ local->stable_write = _gf_true;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_ftruncate_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->ftruncate,
+ local->fd,
+ local->cont.ftruncate.offset,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
- int call_count = -1;
- int i = 0;
- local = frame->local;
- priv = this->private;
+int
+afr_ftruncate_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ local = frame->local;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ local->transaction.unwind (frame, this);
- local->call_count = call_count;
+ AFR_STACK_DESTROY (frame);
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_chown_wind_cbk, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->chown,
- &local->loc, local->cont.chown.uid,
- local->cont.chown.gid);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ return 0;
}
int
-afr_chown_done (call_frame_t *frame, xlator_t *this)
+afr_do_ftruncate (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
+ call_frame_t * transaction_frame = NULL;
+ afr_local_t * local = NULL;
+ int op_ret = -1;
+ int op_errno = 0;
- local = frame->local;
+ local = frame->local;
- local->transaction.unwind (frame, this);
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ goto out;
+ }
- AFR_STACK_DESTROY (frame);
+ transaction_frame->local = local;
+ frame->local = NULL;
- return 0;
-}
+ local->op = GF_FOP_FTRUNCATE;
+ local->transaction.fop = afr_ftruncate_wind;
+ local->transaction.done = afr_ftruncate_done;
+ local->transaction.unwind = afr_ftruncate_unwind;
-int
-afr_chown (call_frame_t *frame, xlator_t *this,
- loc_t *loc, uid_t uid, gid_t gid)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
+ local->transaction.main_frame = frame;
+
+ local->transaction.start = local->cont.ftruncate.offset;
+ local->transaction.len = 0;
- int ret = -1;
+ op_ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
- int op_ret = -1;
- int op_errno = 0;
+ op_ret = 0;
+out:
+ if (op_ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, NULL,
+ NULL, NULL);
+ }
+
+ return 0;
+}
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- priv = this->private;
+int
+afr_ftruncate (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, off_t offset, dict_t *xdata)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ priv = this->private;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
+ QUORUM_CHECK(ftruncate,out);
- transaction_frame->local = local;
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- local->cont.chown.uid = uid;
- local->cont.chown.gid = gid;
- local->cont.chown.ino = loc->inode->ino;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- local->transaction.fop = afr_chown_wind;
- local->transaction.done = afr_chown_done;
- local->transaction.unwind = afr_chown_unwind;
+ local->cont.ftruncate.offset = offset;
- loc_copy (&local->loc, loc);
+ local->fd = fd_ref (fd);
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
+ afr_open_fd_fix (fd, this);
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ afr_do_ftruncate (frame, this);
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
+ }
- return 0;
+ return 0;
}
-
/* }}} */
-/* {{{ chown */
+/* {{{ setattr */
int
-afr_fchown_unwind (call_frame_t *frame, xlator_t *this)
+afr_setattr_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- call_frame_t *main_frame = NULL;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- local->cont.fchown.buf.st_ino = local->cont.fchown.ino;
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
- &local->cont.fchown.buf);
- }
- return 0;
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (setattr, main_frame, local->op_ret,
+ local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ NULL);
+ }
+
+ return 0;
}
int
-afr_fchown_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+afr_setattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int child_index = (long) cookie;
- int need_unwind = 0;
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ int child_index = (long) cookie;
+ int read_child = 0;
+ int call_count = -1;
+ int need_unwind = 0;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
+ priv = this->private;
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
+ read_child = afr_inode_get_read_ctx (this, local->loc.inode, NULL);
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.fchown.buf = *buf;
- }
- local->success_count++;
-
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
- }
+ LOCK (&frame->lock);
+ {
+ if (child_index == read_child) {
+ local->read_child_returned = _gf_true;
+ }
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, preop, postop,
+ xdata);
- if (need_unwind) {
- local->transaction.unwind (frame, this);
- }
+ if ((local->success_count >= priv->wait_count)
+ && local->read_child_returned) {
+ need_unwind = 1;
+ }
+ }
+ UNLOCK (&frame->lock);
- call_count = afr_frame_return (frame);
+ if (need_unwind)
+ local->transaction.unwind (frame, this);
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
-}
+ call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
-int
-afr_fchown_wind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
+ return 0;
+}
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_fchown_wind_cbk, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fchown,
- local->fd, local->cont.fchown.uid,
- local->cont.fchown.gid);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+int32_t
+afr_setattr_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_setattr_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->setattr,
+ &local->loc,
+ &local->cont.setattr.in_buf,
+ local->cont.setattr.valid,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
}
int
-afr_fchown_done (call_frame_t *frame, xlator_t *this)
+afr_setattr_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- local->transaction.unwind (frame, this);
+ local->transaction.unwind (frame, this);
- AFR_STACK_DESTROY (frame);
+ AFR_STACK_DESTROY (frame);
- return 0;
+ return 0;
}
int
-afr_fchown (call_frame_t *frame, xlator_t *this,
- fd_t *fd, uid_t uid, gid_t gid)
+afr_setattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, struct iatt *buf, int32_t valid, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
-
- int ret = -1;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
- int op_ret = -1;
- int op_errno = 0;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ priv = this->private;
- priv = this->private;
+ QUORUM_CHECK(setattr,out);
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- transaction_frame->local = local;
+ local->cont.setattr.in_buf = *buf;
+ local->cont.setattr.valid = valid;
- local->cont.fchown.uid = uid;
- local->cont.fchown.gid = gid;
- local->cont.fchown.ino = fd->inode->ino;
+ local->transaction.fop = afr_setattr_wind;
+ local->transaction.done = afr_setattr_done;
+ local->transaction.unwind = afr_setattr_unwind;
- local->transaction.fop = afr_fchown_wind;
- local->transaction.done = afr_fchown_done;
- local->transaction.unwind = afr_fchown_unwind;
+ loc_copy (&local->loc, loc);
- local->fd = fd_ref (fd);
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
-
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL);
+ }
- return 0;
+ return 0;
}
-/* }}} */
-
-/* {{{ writev */
+/* {{{ fsetattr */
int
-afr_writev_unwind (call_frame_t *frame, xlator_t *this)
+afr_fsetattr_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- call_frame_t *main_frame = NULL;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- local->cont.writev.buf.st_ino = local->cont.writev.ino;
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
- &local->cont.writev.buf);
- }
- return 0;
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (fsetattr, main_frame, local->op_ret,
+ local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ NULL);
+ }
+
+ return 0;
}
int
-afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+afr_fsetattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ int child_index = (long) cookie;
+ int read_child = 0;
+ int call_count = -1;
+ int need_unwind = 0;
- int child_index = (long) cookie;
- int call_count = -1;
- int need_unwind = 0;
+ local = frame->local;
+ priv = this->private;
- local = frame->local;
- priv = this->private;
+ read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
+ LOCK (&frame->lock);
+ {
+ if (child_index == read_child) {
+ local->read_child_returned = _gf_true;
+ }
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.writev.buf = *buf;
- }
- local->success_count++;
-
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
- }
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, preop, postop,
+ xdata);
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ if ((local->success_count >= priv->wait_count)
+ && local->read_child_returned) {
+ need_unwind = 1;
+ }
+ }
+ UNLOCK (&frame->lock);
- call_count = afr_frame_return (frame);
+ if (need_unwind)
+ local->transaction.unwind (frame, this);
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
+ call_count = afr_frame_return (frame);
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
}
-int
-afr_writev_wind (call_frame_t *frame, xlator_t *this)
+int32_t
+afr_fsetattr_wind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int i = 0;
- int call_count = -1;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_writev_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->writev,
- local->fd,
- local->cont.writev.vector,
- local->cont.writev.count,
- local->cont.writev.offset,
- local->cont.writev.iobref);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_fsetattr_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fsetattr,
+ local->fd,
+ &local->cont.fsetattr.in_buf,
+ local->cont.fsetattr.valid,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
}
int
-afr_writev_done (call_frame_t *frame, xlator_t *this)
+afr_fsetattr_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
-
- local = frame->local;
+ afr_local_t *local = NULL;
- iobref_unref (local->cont.writev.iobref);
- local->cont.writev.iobref = NULL;
+ local = frame->local;
- local->transaction.unwind (frame, this);
+ local->transaction.unwind (frame, this);
- AFR_STACK_DESTROY (frame);
+ AFR_STACK_DESTROY (frame);
- return 0;
+ return 0;
}
-
int
-afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref)
+afr_fsetattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, struct iatt *buf, int32_t valid, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
- int ret = -1;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- int op_ret = -1;
- int op_errno = 0;
+ priv = this->private;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
- priv = this->private;
+ QUORUM_CHECK(fsetattr,out);
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- transaction_frame->local = local;
+ local->cont.fsetattr.in_buf = *buf;
+ local->cont.fsetattr.valid = valid;
- local->op = GF_FOP_WRITE;
- local->cont.writev.vector = iov_dup (vector, count);
- local->cont.writev.count = count;
- local->cont.writev.offset = offset;
- local->cont.writev.ino = fd->inode->ino;
- local->cont.writev.iobref = iobref_ref (iobref);
+ local->transaction.fop = afr_fsetattr_wind;
+ local->transaction.done = afr_fsetattr_done;
+ local->transaction.unwind = afr_fsetattr_unwind;
- local->transaction.fop = afr_writev_wind;
- local->transaction.done = afr_writev_done;
- local->transaction.unwind = afr_writev_unwind;
+ local->fd = fd_ref (fd);
- local->fd = fd_ref (fd);
+ afr_open_fd_fix (fd, this);
- local->transaction.main_frame = frame;
- if (fd->flags & O_APPEND) {
- local->transaction.start = 0;
- local->transaction.len = 0;
- } else {
- local->transaction.start = offset;
- local->transaction.len = iov_length (vector, count);
- }
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
+ }
- return 0;
+ return 0;
}
-/* }}} */
+/* {{{ setxattr */
-/* {{{ truncate */
int
-afr_truncate_unwind (call_frame_t *frame, xlator_t *this)
+afr_setxattr_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- call_frame_t *main_frame = NULL;
-
- local = frame->local;
- priv = this->private;
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (setxattr, main_frame,
+ local->op_ret, local->op_errno,
+ NULL);
+ }
+ return 0;
+}
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
- if (main_frame) {
- local->cont.truncate.buf.st_ino = local->cont.truncate.ino;
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
- &local->cont.truncate.buf);
- }
- return 0;
+int
+afr_setxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int need_unwind = 0;
+ int child_index = (long) cookie;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ __inode_write_fop_cbk (frame, child_index, -1, this,
+ &op_ret, &op_errno, NULL, NULL,
+ xdata);
+ if (local->success_count == priv->child_count) {
+ need_unwind = 1;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (need_unwind)
+ local->transaction.unwind (frame, this);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
}
int
-afr_truncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+afr_setxattr_wind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_setxattr_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->setxattr,
+ &local->loc,
+ local->cont.setxattr.dict,
+ local->cont.setxattr.flags,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
- int child_index = (long) cookie;
- int call_count = -1;
- int need_unwind = 0;
- local = frame->local;
- priv = this->private;
+int
+afr_setxattr_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = frame->local;
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
+ local->transaction.unwind (frame, this);
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.truncate.buf = *buf;
- }
- local->success_count++;
-
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
- }
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ AFR_STACK_DESTROY (frame);
- if (need_unwind)
- local->transaction.unwind (frame, this);
+ return 0;
+}
- call_count = afr_frame_return (frame);
+int
+afr_setxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = EINVAL;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
-}
+ VALIDATE_OR_GOTO (this, out);
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.afr.*", dict,
+ op_errno, out);
-int32_t
-afr_truncate_wind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int i = 0;
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.afr.*", dict,
+ op_errno, out);
- local = frame->local;
- priv = this->private;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this->private, out);
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ priv = this->private;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ QUORUM_CHECK(setxattr,out);
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- local->call_count = call_count;
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_truncate_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->truncate,
- &local->loc,
- local->cont.truncate.offset);
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
+ local->cont.setxattr.dict = dict_ref (dict);
+ local->cont.setxattr.flags = flags;
+ local->transaction.fop = afr_setxattr_wind;
+ local->transaction.done = afr_setxattr_done;
+ local->transaction.unwind = afr_setxattr_unwind;
-int
-afr_truncate_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
+ loc_copy (&local->loc, loc);
- local = frame->local;
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
- local->transaction.unwind (frame, this);
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- AFR_STACK_DESTROY (frame);
+ ret = 0;
+out:
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
+ }
- return 0;
+ return 0;
}
+/* {{{ fsetxattr */
+
int
-afr_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset)
+afr_fsetxattr_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (fsetxattr, main_frame,
+ local->op_ret, local->op_errno,
+ NULL);
+ }
+ return 0;
+}
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
+int
+afr_fsetxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int need_unwind = 0;
+ int child_index = (long) cookie;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ local = frame->local;
+ priv = this->private;
- priv = this->private;
+ LOCK (&frame->lock);
+ {
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+ __inode_write_fop_cbk (frame, child_index, -1, this,
+ &op_ret, &op_errno, NULL, NULL,
+ xdata);
+ if (local->success_count == priv->child_count) {
+ need_unwind = 1;
+ }
+ }
+ UNLOCK (&frame->lock);
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ if (need_unwind)
+ local->transaction.unwind (frame, this);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ call_count = afr_frame_return (frame);
- transaction_frame->local = local;
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
- local->op_ret = -1;
+ return 0;
+}
- local->cont.truncate.offset = offset;
- local->cont.truncate.ino = loc->inode->ino;
- local->transaction.fop = afr_truncate_wind;
- local->transaction.done = afr_truncate_done;
- local->transaction.unwind = afr_truncate_unwind;
+int
+afr_fsetxattr_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_fsetxattr_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fsetxattr,
+ local->fd,
+ local->cont.fsetxattr.dict,
+ local->cont.fsetxattr.flags,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
- loc_copy (&local->loc, loc);
- local->transaction.main_frame = frame;
- local->transaction.start = 0;
- local->transaction.len = offset;
+int
+afr_fsetxattr_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = frame->local;
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ local->transaction.unwind (frame, this);
- op_ret = 0;
-out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
+ AFR_STACK_DESTROY (frame);
- return 0;
+ return 0;
}
+int
+afr_fsetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = EINVAL;
-/* }}} */
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
-/* {{{ ftruncate */
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.afr.*", dict,
+ op_errno, out);
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.afr.*", dict,
+ op_errno, out);
-int
-afr_ftruncate_unwind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- call_frame_t *main_frame = NULL;
+ priv = this->private;
- local = frame->local;
- priv = this->private;
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ QUORUM_CHECK(fsetxattr,out);
- if (main_frame) {
- local->cont.ftruncate.buf.st_ino = local->cont.ftruncate.ino;
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
- &local->cont.ftruncate.buf);
- }
- return 0;
-}
+ AFR_LOCAL_ALLOC_OR_GOTO (local, out);
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
-int
-afr_ftruncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ goto out;
+ }
- int child_index = (long) cookie;
- int call_count = -1;
- int need_unwind = 0;
+ transaction_frame->local = local;
- local = frame->local;
- priv = this->private;
+ local->op_ret = -1;
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
+ local->cont.fsetxattr.dict = dict_ref (dict);
+ local->cont.fsetxattr.flags = flags;
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.ftruncate.buf = *buf;
- }
- local->success_count++;
-
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
- }
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ local->transaction.fop = afr_fsetxattr_wind;
+ local->transaction.done = afr_fsetxattr_done;
+ local->transaction.unwind = afr_fsetxattr_unwind;
- if (need_unwind)
- local->transaction.unwind (frame, this);
+ local->fd = fd_ref (fd);
- call_count = afr_frame_return (frame);
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
+ }
+
+ return 0;
}
+/* }}} */
-int
-afr_ftruncate_wind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int i = 0;
- local = frame->local;
- priv = this->private;
+/* {{{ removexattr */
- call_count = afr_up_children_count (priv->child_count, local->child_up);
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+int
+afr_removexattr_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (removexattr, main_frame,
+ local->op_ret, local->op_errno,
+ NULL);
+ }
+ return 0;
+}
- local->call_count = call_count;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_ftruncate_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->ftruncate,
- local->fd, local->cont.ftruncate.offset);
+int
+afr_removexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int need_unwind = 0;
+ int child_index = (long) cookie;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ __inode_write_fop_cbk (frame, child_index, -1, this,
+ &op_ret, &op_errno, NULL, NULL,
+ xdata);
+ if (local->success_count == priv->wait_count) {
+ need_unwind = 1;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (need_unwind)
+ local->transaction.unwind (frame, this);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+
+int32_t
+afr_removexattr_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_removexattr_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->removexattr,
+ &local->loc,
+ local->cont.removexattr.name,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
}
int
-afr_ftruncate_done (call_frame_t *frame, xlator_t *this)
+afr_removexattr_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
-
- local = frame->local;
+ afr_local_t * local = frame->local;
- local->transaction.unwind (frame, this);
+ local->transaction.unwind (frame, this);
- AFR_STACK_DESTROY (frame);
+ AFR_STACK_DESTROY (frame);
- return 0;
+ return 0;
}
int
-afr_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset)
+afr_removexattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
- int ret = -1;
+ VALIDATE_OR_GOTO (this, out);
- int op_ret = -1;
- int op_errno = 0;
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.afr.*",
+ name, op_errno, out);
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.afr.*",
+ name, op_errno, out);
- priv = this->private;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this->private, out);
+ VALIDATE_OR_GOTO (loc, out);
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+ priv = this->private;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ QUORUM_CHECK(removexattr,out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- transaction_frame->local = local;
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- local->op = GF_FOP_FTRUNCATE;
- local->op_ret = -1;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- local->cont.ftruncate.offset = offset;
- local->cont.ftruncate.ino = fd->inode->ino;
+ local->cont.removexattr.name = gf_strdup (name);
- local->transaction.fop = afr_ftruncate_wind;
- local->transaction.done = afr_ftruncate_done;
- local->transaction.unwind = afr_ftruncate_unwind;
+ local->transaction.fop = afr_removexattr_wind;
+ local->transaction.done = afr_removexattr_done;
+ local->transaction.unwind = afr_removexattr_unwind;
- local->fd = fd_ref (fd);
+ loc_copy (&local->loc, loc);
- local->transaction.main_frame = frame;
- local->transaction.start = 0;
- local->transaction.len = offset;
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
+ }
- return 0;
+ return 0;
}
-/* }}} */
-
-/* {{{ utimens */
-
-
+/* ffremovexattr */
int
-afr_utimens_unwind (call_frame_t *frame, xlator_t *this)
+afr_fremovexattr_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- call_frame_t *main_frame = NULL;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- local->cont.utimens.buf.st_ino = local->cont.utimens.ino;
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
- &local->cont.utimens.buf);
- }
- return 0;
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (fremovexattr, main_frame,
+ local->op_ret, local->op_errno,
+ NULL);
+ }
+ return 0;
}
int
-afr_utimens_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+afr_fremovexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ int call_count = -1;
+ int need_unwind = 0;
+ int child_index = (long) cookie;
- int child_index = (long) cookie;
- int call_count = -1;
- int need_unwind = 1;
+ local = frame->local;
+ priv = this->private;
- local = frame->local;
- priv = this->private;
+ LOCK (&frame->lock);
+ {
+ __inode_write_fop_cbk (frame, child_index, -1, this,
+ &op_ret, &op_errno, NULL, NULL,
+ xdata);
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
+ if (local->success_count == priv->wait_count) {
+ need_unwind = 1;
+ }
+ }
+ UNLOCK (&frame->lock);
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.utimens.buf = *buf;
- }
- local->success_count++;
-
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
- }
+ if (need_unwind)
+ local->transaction.unwind (frame, this);
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ call_count = afr_frame_return (frame);
- if (need_unwind)
- local->transaction.unwind (frame, this);
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return 0;
}
-int
-afr_utimens_wind (call_frame_t *frame, xlator_t *this)
+int32_t
+afr_fremovexattr_wind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_fremovexattr_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fremovexattr,
+ local->fd,
+ local->cont.removexattr.name,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
- call_count = afr_up_children_count (priv->child_count, local->child_up);
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+int
+afr_fremovexattr_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = frame->local;
- local->call_count = call_count;
+ local->transaction.unwind (frame, this);
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_utimens_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->utimens,
- &local->loc,
- local->cont.utimens.tv);
+ AFR_STACK_DESTROY (frame);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ return 0;
}
int
-afr_utimens_done (call_frame_t *frame, xlator_t *this)
+afr_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
{
- afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_ret = -1;
+ int op_errno = 0;
- local = frame->local;
+ VALIDATE_OR_GOTO (this, out);
- local->transaction.unwind (frame, this);
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.afr.*",
+ name, op_errno, out);
- AFR_STACK_DESTROY (frame);
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.afr.*",
+ name, op_errno, out);
- return 0;
-}
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this->private, out);
+ priv = this->private;
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
-int
-afr_utimens (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct timespec tv[2])
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
+ QUORUM_CHECK(fremovexattr, out);
- int ret = -1;
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ goto out;
+ }
- int op_ret = -1;
- int op_errno = 0;
+ AFR_LOCAL_ALLOC_OR_GOTO (local, out);
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- priv = this->private;
+ transaction_frame->local = local;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+ local->op_ret = -1;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ local->cont.removexattr.name = gf_strdup (name);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ local->transaction.fop = afr_fremovexattr_wind;
+ local->transaction.done = afr_fremovexattr_done;
+ local->transaction.unwind = afr_fremovexattr_unwind;
+
+ local->fd = fd_ref (fd);
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
+
+ op_ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, NULL);
+ }
- transaction_frame->local = local;
+ return 0;
+}
- local->op_ret = -1;
+static int
+afr_fallocate_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (fallocate, main_frame, local->op_ret,
+ local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ NULL);
+ }
+ return 0;
+}
- local->cont.utimens.tv[0] = tv[0];
- local->cont.utimens.tv[1] = tv[1];
+static int
+afr_fallocate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
+ int need_unwind = 0;
+ int read_child = 0;
- local->cont.utimens.ino = loc->inode->ino;
+ local = frame->local;
+ priv = this->private;
- local->transaction.fop = afr_utimens_wind;
- local->transaction.done = afr_utimens_done;
- local->transaction.unwind = afr_utimens_unwind;
+ read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
- loc_copy (&local->loc, loc);
-
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
+ LOCK (&frame->lock);
+ {
+ if (child_index == read_child) {
+ local->read_child_returned = _gf_true;
+ }
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, prebuf, postbuf,
+ xdata);
- op_ret = 0;
-out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
+ if ((local->success_count >= priv->wait_count)
+ && local->read_child_returned) {
+ need_unwind = 1;
+ }
+ }
+ UNLOCK (&frame->lock);
- return 0;
-}
+ if (need_unwind)
+ local->transaction.unwind (frame, this);
-/* }}} */
+ call_count = afr_frame_return (frame);
-/* {{{ setxattr */
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
+ return 0;
+}
-int
-afr_setxattr_unwind (call_frame_t *frame, xlator_t *this)
+static int
+afr_fallocate_wind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_fallocate_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fallocate,
+ local->fd,
+ local->cont.fallocate.mode,
+ local->cont.fallocate.offset,
+ local->cont.fallocate.len,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
- local = frame->local;
- priv = this->private;
+static int
+afr_fallocate_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ local = frame->local;
- if (main_frame) {
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno)
- }
- return 0;
-}
+ local->transaction.unwind (frame, this);
+ AFR_STACK_DESTROY (frame);
-int
-afr_setxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ return 0;
+}
+
+static int
+afr_do_fallocate (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ call_frame_t * transaction_frame = NULL;
+ afr_local_t * local = NULL;
+ int op_ret = -1;
+ int op_errno = 0;
- int call_count = -1;
- int need_unwind = 0;
+ local = frame->local;
- local = frame->local;
- priv = this->private;
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ goto out;
+ }
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
-
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
- }
+ transaction_frame->local = local;
+ frame->local = NULL;
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ local->op = GF_FOP_FALLOCATE;
- if (need_unwind)
- local->transaction.unwind (frame, this);
+ local->transaction.fop = afr_fallocate_wind;
+ local->transaction.done = afr_fallocate_done;
+ local->transaction.unwind = afr_fallocate_unwind;
- call_count = afr_frame_return (frame);
+ local->transaction.main_frame = frame;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
-}
+ local->transaction.start = local->cont.fallocate.offset;
+ local->transaction.len = 0;
+ /* fallocate can modify the file size */
+ op_ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (fallocate, frame, op_ret, op_errno, NULL,
+ NULL, NULL);
+ }
+
+ return 0;
+}
int
-afr_setxattr_wind (call_frame_t *frame, xlator_t *this)
+afr_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
- int call_count = -1;
- int i = 0;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- local = frame->local;
- priv = this->private;
+ priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
+ QUORUM_CHECK(fallocate,out);
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- local->call_count = call_count;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_setxattr_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setxattr,
- &local->loc,
- local->cont.setxattr.dict,
- local->cont.setxattr.flags);
+ local->cont.fallocate.mode = mode;
+ local->cont.fallocate.offset = offset;
+ local->cont.fallocate.len = len;
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
+ local->fd = fd_ref (fd);
+ afr_open_fd_fix (fd, this);
-int
-afr_setxattr_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = frame->local;
+ afr_do_fallocate (frame, this);
- local->transaction.unwind (frame, this);
+ ret = 0;
+out:
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+ }
- AFR_STACK_DESTROY (frame);
-
- return 0;
+ return 0;
}
+/* }}} */
-int
-afr_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *dict, int32_t flags)
+/* {{{ discard */
+
+static int
+afr_discard_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (discard, main_frame, local->op_ret,
+ local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ NULL);
+ }
+ return 0;
+}
- int ret = -1;
+static int
+afr_discard_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
+ int need_unwind = 0;
+ int read_child = 0;
- int op_ret = -1;
- int op_errno = 0;
+ local = frame->local;
+ priv = this->private;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
- priv = this->private;
+ LOCK (&frame->lock);
+ {
+ if (child_index == read_child) {
+ local->read_child_returned = _gf_true;
+ }
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, prebuf, postbuf,
+ xdata);
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ if ((local->success_count >= priv->wait_count)
+ && local->read_child_returned) {
+ need_unwind = 1;
+ }
+ }
+ UNLOCK (&frame->lock);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ if (need_unwind)
+ local->transaction.unwind (frame, this);
- transaction_frame->local = local;
+ call_count = afr_frame_return (frame);
- local->op_ret = -1;
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
- local->cont.setxattr.dict = dict_ref (dict);
- local->cont.setxattr.flags = flags;
+ return 0;
+}
- local->transaction.fop = afr_setxattr_wind;
- local->transaction.done = afr_setxattr_done;
- local->transaction.unwind = afr_setxattr_unwind;
+static int
+afr_discard_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_discard_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->discard,
+ local->fd,
+ local->cont.discard.offset,
+ local->cont.discard.len,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
- loc_copy (&local->loc, loc);
+static int
+afr_discard_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
+ local = frame->local;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ local->transaction.unwind (frame, this);
- op_ret = 0;
-out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno);
- }
+ AFR_STACK_DESTROY (frame);
- return 0;
+ return 0;
}
-/* }}} */
+static int
+afr_do_discard (call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t * transaction_frame = NULL;
+ afr_local_t * local = NULL;
+ int op_ret = -1;
+ int op_errno = 0;
-/* {{{ removexattr */
+ local = frame->local;
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ goto out;
+ }
-int
-afr_removexattr_unwind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- call_frame_t *main_frame = NULL;
+ transaction_frame->local = local;
+ frame->local = NULL;
- local = frame->local;
- priv = this->private;
+ local->op = GF_FOP_DISCARD;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ local->transaction.fop = afr_discard_wind;
+ local->transaction.done = afr_discard_done;
+ local->transaction.unwind = afr_discard_unwind;
- if (main_frame) {
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno)
- }
- return 0;
-}
+ local->transaction.main_frame = frame;
+
+ local->transaction.start = local->cont.discard.offset;
+ local->transaction.len = 0;
+ op_ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (discard, frame, op_ret, op_errno, NULL,
+ NULL, NULL);
+ }
+
+ return 0;
+}
int
-afr_removexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+afr_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
- int call_count = -1;
- int need_unwind = 0;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- local = frame->local;
- priv = this->private;
+ priv = this->private;
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
-
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
- }
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
+ QUORUM_CHECK(discard, out);
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- if (need_unwind)
- local->transaction.unwind (frame, this);
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- call_count = afr_frame_return (frame);
+ local->cont.discard.offset = offset;
+ local->cont.discard.len = len;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ local->fd = fd_ref (fd);
+
+ afr_open_fd_fix (fd, this);
+
+ afr_do_discard(frame, this);
+
+ ret = 0;
+out:
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (discard, frame, -1, op_errno, NULL, NULL, NULL);
+ }
+
+ return 0;
}
-int32_t
-afr_removexattr_wind (call_frame_t *frame, xlator_t *this)
+/* {{{ zerofill */
+
+static int
+afr_zerofill_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame) {
+ main_frame = local->transaction.main_frame;
+ }
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (zerofill, main_frame, local->op_ret,
+ local->op_errno,
+ &local->cont.zerofill.prebuf,
+ &local->cont.zerofill.postbuf,
+ NULL);
+ }
+ return 0;
+}
- int call_count = -1;
- int i = 0;
+static int
+afr_zerofill_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
+ int need_unwind = 0;
+ int read_child = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
+
+ LOCK (&frame->lock);
+ {
+ if (child_index == read_child) {
+ local->read_child_returned = _gf_true;
+ }
+
+ if (afr_fop_failed (op_ret, op_errno)) {
+ afr_transaction_fop_failed (frame, this, child_index);
+ }
+
+ if (op_ret != -1) {
+ if (local->success_count == 0) {
+ local->op_ret = op_ret;
+ local->cont.zerofill.prebuf = *prebuf;
+ local->cont.zerofill.postbuf = *postbuf;
+ }
+
+ if (child_index == read_child) {
+ local->cont.zerofill.prebuf = *prebuf;
+ local->cont.zerofill.postbuf = *postbuf;
+ }
+
+ local->success_count++;
+
+ if ((local->success_count >= priv->wait_count)
+ && local->read_child_returned) {
+ need_unwind = 1;
+ }
+ }
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ if (need_unwind) {
+ local->transaction.unwind (frame, this);
+ }
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
- local = frame->local;
- priv = this->private;
+static int
+afr_zerofill_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_zerofill_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->zerofill,
+ local->fd,
+ local->cont.zerofill.offset,
+ local->cont.zerofill.len,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+static int
+afr_zerofill_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ local = frame->local;
- local->call_count = call_count;
+ local->transaction.unwind (frame, this);
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_removexattr_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->removexattr,
- &local->loc,
- local->cont.removexattr.name);
+ AFR_STACK_DESTROY (frame);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ return 0;
}
-
-int
-afr_removexattr_done (call_frame_t *frame, xlator_t *this)
+static int
+afr_do_zerofill(call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = frame->local;
+ call_frame_t *transaction_frame = NULL;
+ afr_local_t *local = NULL;
+ int op_ret = -1;
+ int op_errno = 0;
- local->transaction.unwind (frame, this);
+ local = frame->local;
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ goto out;
+ }
+ transaction_frame->local = local;
+ frame->local = NULL;
-int
-afr_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
+ local->op = GF_FOP_ZEROFILL;
- int ret = -1;
+ local->transaction.fop = afr_zerofill_wind;
+ local->transaction.done = afr_zerofill_done;
+ local->transaction.unwind = afr_zerofill_unwind;
- int op_ret = -1;
- int op_errno = 0;
+ local->transaction.main_frame = frame;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (loc, out);
+ local->transaction.start = local->cont.zerofill.offset;
+ local->transaction.len = 0;
- priv = this->private;
+ op_ret = afr_transaction (transaction_frame, this,
+ AFR_DATA_TRANSACTION);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+ op_ret = 0;
+out:
+ if (op_ret < 0) {
+ if (transaction_frame) {
+ AFR_STACK_DESTROY (transaction_frame);
+ }
+ AFR_STACK_UNWIND (zerofill, frame, op_ret, op_errno, NULL,
+ NULL, NULL);
+ }
+
+ return 0;
+}
- ALLOC_OR_GOTO (local, afr_local_t, out);
+int
+afr_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- transaction_frame->local = local;
+ priv = this->private;
- local->op_ret = -1;
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
+ QUORUM_CHECK(zerofill, out);
- local->cont.removexattr.name = strdup (name);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- local->transaction.fop = afr_removexattr_wind;
- local->transaction.done = afr_removexattr_done;
- local->transaction.unwind = afr_removexattr_unwind;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0) {
+ goto out;
+ }
+ local->cont.zerofill.offset = offset;
+ local->cont.zerofill.len = len;
- loc_copy (&local->loc, loc);
+ local->fd = fd_ref (fd);
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
+ afr_open_fd_fix (fd, this);
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ afr_do_zerofill(frame, this);
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno);
- }
-
- return 0;
+ if (ret < 0) {
+ if (transaction_frame) {
+ AFR_STACK_DESTROY (transaction_frame);
+ }
+ AFR_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL,
+ NULL, NULL);
+ }
+
+ return 0;
}
+
+/* }}} */
+
+
diff --git a/xlators/cluster/afr/src/afr-inode-write.h b/xlators/cluster/afr/src/afr-inode-write.h
index 358d25b52..8e93ca44a 100644
--- a/xlators/cluster/afr/src/afr-inode-write.h
+++ b/xlators/cluster/afr/src/afr-inode-write.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __INODE_WRITE_H__
@@ -22,43 +13,70 @@
int32_t
afr_chmod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode);
+ loc_t *loc, mode_t mode, dict_t *xdata);
int32_t
afr_chown (call_frame_t *frame, xlator_t *this,
- loc_t *loc, uid_t uid, gid_t gid);
+ loc_t *loc, uid_t uid, gid_t gid, dict_t *xdata);
int
afr_fchown (call_frame_t *frame, xlator_t *this,
- fd_t *fd, uid_t uid, gid_t gid);
+ fd_t *fd, uid_t uid, gid_t gid, dict_t *xdata);
int32_t
afr_fchmod (call_frame_t *frame, xlator_t *this,
- fd_t *fd, mode_t mode);
+ fd_t *fd, mode_t mode, dict_t *xdata);
int32_t
-afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref);
+ uint32_t flags, struct iobref *iobref, dict_t *xdata);
int32_t
afr_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset);
+ loc_t *loc, off_t offset, dict_t *xdata);
int32_t
afr_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset);
+ fd_t *fd, off_t offset, dict_t *xdata);
int32_t
afr_utimens (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct timespec tv[2]);
+ loc_t *loc, struct timespec tv[2], dict_t *xdata);
+
+int
+afr_setattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, struct iatt *buf, int32_t valid, dict_t *xdata);
+
+int
+afr_fsetattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, struct iatt *buf, int32_t valid, dict_t *xdata);
int32_t
afr_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *dict, int32_t flags);
+ loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata);
+
+int32_t
+afr_fsetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, dict_t *dict, int32_t flags, dict_t *xdata);
int32_t
afr_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name);
+ loc_t *loc, const char *name, dict_t *xdata);
+int32_t
+afr_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata);
+
+int
+afr_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata);
+
+int
+afr_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata);
+
+int
+afr_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata);
#endif /* __INODE_WRITE_H__ */
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
new file mode 100644
index 000000000..060d78f35
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-lk-common.c
@@ -0,0 +1,2174 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "dict.h"
+#include "byte-order.h"
+#include "common-utils.h"
+
+#include "afr.h"
+#include "afr-transaction.h"
+
+#include <signal.h>
+
+
+#define LOCKED_NO 0x0 /* no lock held */
+#define LOCKED_YES 0x1 /* for DATA, METADATA, ENTRY and higher_path */
+#define LOCKED_LOWER 0x2 /* for lower path */
+
+#define AFR_TRACE_INODELK_IN(frame, this, params ...) \
+ do { \
+ afr_private_t *_priv = this->private; \
+ if (!_priv->inodelk_trace) \
+ break; \
+ afr_trace_inodelk_in (frame, this, params); \
+ } while (0);
+
+#define AFR_TRACE_INODELK_OUT(frame, this, params ...) \
+ do { \
+ afr_private_t *_priv = this->private; \
+ if (!_priv->inodelk_trace) \
+ break; \
+ afr_trace_inodelk_out (frame, this, params); \
+ } while (0);
+
+#define AFR_TRACE_ENTRYLK_IN(frame, this, params ...) \
+ do { \
+ afr_private_t *_priv = this->private; \
+ if (!_priv->entrylk_trace) \
+ break; \
+ afr_trace_entrylk_in (frame, this, params); \
+ } while (0);
+
+#define AFR_TRACE_ENTRYLK_OUT(frame, this, params ...) \
+ do { \
+ afr_private_t *_priv = this->private; \
+ if (!_priv->entrylk_trace) \
+ break; \
+ afr_trace_entrylk_out (frame, this, params); \
+ } while (0);
+
+int
+afr_entry_lockee_cmp (const void *l1, const void *l2)
+{
+ const afr_entry_lockee_t *r1 = l1;
+ const afr_entry_lockee_t *r2 = l2;
+ int ret = 0;
+ uuid_t gfid1 = {0};
+ uuid_t gfid2 = {0};
+
+ loc_gfid ((loc_t*)&r1->loc, gfid1);
+ loc_gfid ((loc_t*)&r2->loc, gfid2);
+ ret = uuid_compare (gfid1, gfid2);
+ /*Entrylks with NULL basename are the 'smallest'*/
+ if (ret == 0) {
+ if (!r1->basename)
+ return -1;
+ if (!r2->basename)
+ return 1;
+ ret = strcmp (r1->basename, r2->basename);
+ }
+
+ if (ret <= 0)
+ return -1;
+ else
+ return 1;
+}
+
+int afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index);
+
+static int
+afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this);
+
+static uint64_t afr_lock_number = 1;
+
+static uint64_t
+get_afr_lock_number ()
+{
+ return (++afr_lock_number);
+}
+
+int
+afr_set_lock_number (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ int_lock->lock_number = get_afr_lock_number ();
+
+ return 0;
+}
+
+void
+afr_set_lk_owner (call_frame_t *frame, xlator_t *this, void *lk_owner)
+{
+ gf_log (this->name, GF_LOG_TRACE,
+ "Setting lk-owner=%llu",
+ (unsigned long long) (unsigned long)lk_owner);
+
+ set_lk_owner_from_ptr (&frame->root->lk_owner, lk_owner);
+}
+
+static int
+is_afr_lock_selfheal (afr_local_t *local)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ int ret = -1;
+
+ int_lock = &local->internal_lock;
+
+ switch (int_lock->selfheal_lk_type) {
+ case AFR_DATA_SELF_HEAL_LK:
+ case AFR_METADATA_SELF_HEAL_LK:
+ ret = 1;
+ break;
+ case AFR_ENTRY_SELF_HEAL_LK:
+ ret = 0;
+ break;
+ }
+
+ return ret;
+
+}
+
+int32_t
+internal_lock_count (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int32_t call_count = 0;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i])
+ ++call_count;
+ }
+
+ return call_count;
+}
+
+static void
+afr_print_inodelk (char *str, int size, int cmd,
+ struct gf_flock *flock, gf_lkowner_t *owner)
+{
+ char *cmd_str = NULL;
+ char *type_str = NULL;
+
+ switch (cmd) {
+#if F_GETLK != F_GETLK64
+ case F_GETLK64:
+#endif
+ case F_GETLK:
+ cmd_str = "GETLK";
+ break;
+
+#if F_SETLK != F_SETLK64
+ case F_SETLK64:
+#endif
+ case F_SETLK:
+ cmd_str = "SETLK";
+ break;
+
+#if F_SETLKW != F_SETLKW64
+ case F_SETLKW64:
+#endif
+ case F_SETLKW:
+ cmd_str = "SETLKW";
+ break;
+
+ default:
+ cmd_str = "<null>";
+ break;
+ }
+
+ switch (flock->l_type) {
+ case F_RDLCK:
+ type_str = "READ";
+ break;
+ case F_WRLCK:
+ type_str = "WRITE";
+ break;
+ case F_UNLCK:
+ type_str = "UNLOCK";
+ break;
+ default:
+ type_str = "UNKNOWN";
+ break;
+ }
+
+ snprintf (str, size, "lock=INODELK, cmd=%s, type=%s, "
+ "start=%llu, len=%llu, pid=%llu, lk-owner=%s",
+ cmd_str, type_str, (unsigned long long) flock->l_start,
+ (unsigned long long) flock->l_len,
+ (unsigned long long) flock->l_pid,
+ lkowner_utoa (owner));
+
+}
+
+static void
+afr_print_lockee (char *str, int size, loc_t *loc, fd_t *fd,
+ int child_index)
+{
+ snprintf (str, size, "path=%s, fd=%p, child=%d",
+ loc->path ? loc->path : "<nul>",
+ fd ? fd : NULL,
+ child_index);
+}
+
+void
+afr_print_entrylk (char *str, int size, const char *basename,
+ gf_lkowner_t *owner)
+{
+ snprintf (str, size, "Basename=%s, lk-owner=%s",
+ basename ? basename : "<nul>",
+ lkowner_utoa (owner));
+}
+
+static void
+afr_print_verdict (int op_ret, int op_errno, char *str)
+{
+ if (op_ret < 0) {
+ if (op_errno == EAGAIN)
+ strcpy (str, "EAGAIN");
+ else
+ strcpy (str, "FAILED");
+ }
+ else
+ strcpy (str, "GRANTED");
+}
+
+static void
+afr_set_lock_call_type (afr_lock_call_type_t lock_call_type,
+ char *lock_call_type_str,
+ afr_internal_lock_t *int_lock)
+{
+ switch (lock_call_type) {
+ case AFR_INODELK_TRANSACTION:
+ if (int_lock->transaction_lk_type == AFR_TRANSACTION_LK)
+ strcpy (lock_call_type_str, "AFR_INODELK_TRANSACTION");
+ else
+ strcpy (lock_call_type_str, "AFR_INODELK_SELFHEAL");
+ break;
+ case AFR_INODELK_NB_TRANSACTION:
+ if (int_lock->transaction_lk_type == AFR_TRANSACTION_LK)
+ strcpy (lock_call_type_str, "AFR_INODELK_NB_TRANSACTION");
+ else
+ strcpy (lock_call_type_str, "AFR_INODELK_NB_SELFHEAL");
+ break;
+ case AFR_ENTRYLK_TRANSACTION:
+ if (int_lock->transaction_lk_type == AFR_TRANSACTION_LK)
+ strcpy (lock_call_type_str, "AFR_ENTRYLK_TRANSACTION");
+ else
+ strcpy (lock_call_type_str, "AFR_ENTRYLK_SELFHEAL");
+ break;
+ case AFR_ENTRYLK_NB_TRANSACTION:
+ if (int_lock->transaction_lk_type == AFR_TRANSACTION_LK)
+ strcpy (lock_call_type_str, "AFR_ENTRYLK_NB_TRANSACTION");
+ else
+ strcpy (lock_call_type_str, "AFR_ENTRYLK_NB_SELFHEAL");
+ break;
+ default:
+ strcpy (lock_call_type_str, "UNKNOWN");
+ break;
+ }
+
+}
+
+static void
+afr_trace_inodelk_out (call_frame_t *frame, xlator_t *this,
+ afr_lock_call_type_t lock_call_type,
+ afr_lock_op_type_t lk_op_type, struct gf_flock *flock,
+ int op_ret, int op_errno, int32_t child_index)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+
+ char lockee[256];
+ char lock_call_type_str[256];
+ char verdict[16];
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index);
+
+ afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
+
+ afr_print_verdict (op_ret, op_errno, verdict);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "[%s %s] [%s] lk-owner=%s Lockee={%s} Number={%llu}",
+ lock_call_type_str,
+ lk_op_type == AFR_LOCK_OP ? "LOCK REPLY" : "UNLOCK REPLY",
+ verdict, lkowner_utoa (&frame->root->lk_owner), lockee,
+ (unsigned long long) int_lock->lock_number);
+
+}
+
+static void
+afr_trace_inodelk_in (call_frame_t *frame, xlator_t *this,
+ afr_lock_call_type_t lock_call_type,
+ afr_lock_op_type_t lk_op_type, struct gf_flock *flock,
+ int32_t cmd, int32_t child_index)
+{
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+
+ char lock[256];
+ char lockee[256];
+ char lock_call_type_str[256];
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ afr_print_inodelk (lock, 256, cmd, flock, &frame->root->lk_owner);
+ afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index);
+
+ afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "[%s %s] Lock={%s} Lockee={%s} Number={%llu}",
+ lock_call_type_str,
+ lk_op_type == AFR_LOCK_OP ? "LOCK REQUEST" : "UNLOCK REQUEST",
+ lock, lockee,
+ (unsigned long long) int_lock->lock_number);
+
+}
+
+static void
+afr_trace_entrylk_in (call_frame_t *frame, xlator_t *this,
+ afr_lock_call_type_t lock_call_type,
+ afr_lock_op_type_t lk_op_type, const char *basename,
+ int32_t cookie)
+{
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_private_t *priv = NULL;
+ int child_index = 0;
+ int lockee_no = 0;
+
+ char lock[256];
+ char lockee[256];
+ char lock_call_type_str[256];
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+
+ if (!priv->entrylk_trace) {
+ return;
+ }
+ lockee_no = cookie / priv->child_count;
+ child_index = cookie % priv->child_count;
+
+ afr_print_entrylk (lock, 256, basename, &frame->root->lk_owner);
+ afr_print_lockee (lockee, 256, &int_lock->lockee[lockee_no].loc, local->fd,
+ child_index);
+
+ afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "[%s %s] Lock={%s} Lockee={%s} Number={%llu}, Cookie={%d}",
+ lock_call_type_str,
+ lk_op_type == AFR_LOCK_OP ? "LOCK REQUEST" : "UNLOCK REQUEST",
+ lock, lockee,
+ (unsigned long long) int_lock->lock_number,
+ cookie);
+}
+
+static void
+afr_trace_entrylk_out (call_frame_t *frame, xlator_t *this,
+ afr_lock_call_type_t lock_call_type,
+ afr_lock_op_type_t lk_op_type, const char *basename,
+ int op_ret, int op_errno, int32_t cookie)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int lockee_no = 0;
+ int child_index = 0;
+
+ char lock[256];
+ char lockee[256];
+ char lock_call_type_str[256];
+ char verdict[16];
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+
+ if (!priv->entrylk_trace) {
+ return;
+ }
+ lockee_no = cookie / priv->child_count;
+ child_index = cookie % priv->child_count;
+
+ afr_print_entrylk (lock, 256, basename, &frame->root->lk_owner);
+ afr_print_lockee (lockee, 256, &int_lock->lockee[lockee_no].loc, local->fd,
+ child_index);
+
+ afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
+
+ afr_print_verdict (op_ret, op_errno, verdict);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "[%s %s] [%s] Lock={%s} Lockee={%s} Number={%llu} Cookie={%d}",
+ lock_call_type_str,
+ lk_op_type == AFR_LOCK_OP ? "LOCK REPLY" : "UNLOCK REPLY",
+ verdict,
+ lock, lockee,
+ (unsigned long long) int_lock->lock_number,
+ cookie);
+
+}
+
+static int
+transaction_lk_op (afr_local_t *local)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ int ret = -1;
+
+ int_lock = &local->internal_lock;
+
+ if (int_lock->transaction_lk_type == AFR_TRANSACTION_LK) {
+ gf_log (THIS->name, GF_LOG_DEBUG,
+ "lk op is for a transaction");
+ ret = 1;
+ }
+ else if (int_lock->transaction_lk_type == AFR_SELFHEAL_LK) {
+ gf_log (THIS->name, GF_LOG_DEBUG,
+ "lk op is for a self heal");
+
+ ret = 0;
+ }
+
+ if (ret == -1)
+ gf_log (THIS->name, GF_LOG_DEBUG,
+ "lk op is not set");
+
+ return ret;
+
+}
+
+static int
+is_afr_lock_transaction (afr_local_t *local)
+{
+ int ret = 0;
+
+ switch (local->transaction.type) {
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ ret = 1;
+ break;
+
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ case AFR_ENTRY_TRANSACTION:
+ ret = 0;
+ break;
+
+ }
+
+ return ret;
+}
+
+int
+afr_init_entry_lockee (afr_entry_lockee_t *lockee, afr_local_t *local,
+ loc_t *loc, char *basename, int child_count)
+{
+ int ret = -1;
+
+ loc_copy (&lockee->loc, loc);
+ lockee->basename = (basename)? gf_strdup (basename): NULL;
+ if (basename && !lockee->basename)
+ goto out;
+
+ lockee->locked_count = 0;
+ lockee->locked_nodes = GF_CALLOC (child_count,
+ sizeof (*lockee->locked_nodes),
+ gf_afr_mt_afr_node_character);
+
+ if (!lockee->locked_nodes)
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+
+}
+
+void
+afr_entry_lockee_cleanup (afr_internal_lock_t *int_lock)
+{
+ int i = 0;
+
+ for (i = 0; i < int_lock->lockee_count; i++) {
+ loc_wipe (&int_lock->lockee[i].loc);
+ if (int_lock->lockee[i].basename)
+ GF_FREE (int_lock->lockee[i].basename);
+ if (int_lock->lockee[i].locked_nodes)
+ GF_FREE (int_lock->lockee[i].locked_nodes);
+ }
+
+ return;
+}
+
+static int
+initialize_entrylk_variables (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_private_t *priv = NULL;
+
+ int i = 0;
+
+ priv = this->private;
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ int_lock->entrylk_lock_count = 0;
+ int_lock->lock_op_ret = -1;
+ int_lock->lock_op_errno = 0;
+
+ for (i = 0; i < AFR_LOCKEE_COUNT_MAX; i++) {
+ if (!int_lock->lockee[i].locked_nodes)
+ break;
+ int_lock->lockee[i].locked_count = 0;
+ memset (int_lock->lockee[i].locked_nodes, 0,
+ sizeof (*int_lock->lockee[i].locked_nodes) *
+ priv->child_count);
+ }
+
+ return 0;
+}
+
+static int
+initialize_inodelk_variables (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_private_t *priv = NULL;
+ afr_inodelk_t *inodelk = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+
+ inodelk->lock_count = 0;
+ int_lock->lk_attempted_count = 0;
+ int_lock->lock_op_ret = -1;
+ int_lock->lock_op_errno = 0;
+
+ memset (inodelk->locked_nodes, 0,
+ sizeof (*inodelk->locked_nodes) * priv->child_count);
+ memset (int_lock->locked_nodes, 0,
+ sizeof (*int_lock->locked_nodes) * priv->child_count);
+
+ return 0;
+}
+
+loc_t *
+lower_path (loc_t *l1, const char *b1, loc_t *l2, const char *b2)
+{
+ int ret = 0;
+
+ ret = uuid_compare (l1->inode->gfid, l2->inode->gfid);
+
+ if (ret == 0)
+ ret = strcmp (b1, b2);
+
+ if (ret <= 0)
+ return l1;
+ else
+ return l2;
+}
+
+int
+afr_lockee_locked_nodes_count (afr_internal_lock_t *int_lock)
+{
+ int call_count = 0;
+ int i = 0;
+
+ for (i = 0; i < int_lock->lockee_count; i++)
+ call_count += int_lock->lockee[i].locked_count;
+
+ return call_count;
+}
+
+int
+afr_locked_nodes_count (unsigned char *locked_nodes, int child_count)
+
+{
+ int i = 0;
+ int call_count = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (locked_nodes[i] & LOCKED_YES)
+ call_count++;
+ }
+
+ return call_count;
+}
+
+/* FIXME: What if UNLOCK fails */
+static int32_t
+afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ int call_count = 0;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ LOCK (&frame->lock);
+ {
+ call_count = --int_lock->lk_call_count;
+ }
+ UNLOCK (&frame->lock);
+
+ if (call_count == 0) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "All internal locks unlocked");
+ int_lock->lock_cbk (frame, this);
+ }
+
+ return 0;
+}
+
+static int32_t
+afr_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
+ int32_t child_index = (long)cookie;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_TRANSACTION,
+ AFR_UNLOCK_OP, NULL, op_ret,
+ op_errno, child_index);
+
+ priv = this->private;
+
+ if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
+ gf_log (this->name, GF_LOG_INFO, "%s: unlock failed on subvolume %s "
+ "with lock owner %s", local->loc.path,
+ priv->children[child_index]->name,
+ lkowner_utoa (&frame->root->lk_owner));
+ }
+
+
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ inodelk->locked_nodes[child_index] &= LOCKED_NO;
+ if (local->transaction.eager_lock)
+ local->transaction.eager_lock[child_index] = 0;
+
+ afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno, xdata);
+
+ return 0;
+
+}
+
+static int
+afr_unlock_inodelk (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ struct gf_flock flock = {0,};
+ struct gf_flock full_flock = {0,};
+ struct gf_flock *flock_use = NULL;
+ int call_count = 0;
+ int i = 0;
+ int piggyback = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+
+ flock.l_start = inodelk->flock.l_start;
+ flock.l_len = inodelk->flock.l_len;
+ flock.l_type = F_UNLCK;
+
+ full_flock.l_type = F_UNLCK;
+ call_count = afr_locked_nodes_count (inodelk->locked_nodes,
+ priv->child_count);
+
+ int_lock->lk_call_count = call_count;
+
+ if (!call_count) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "No internal locks unlocked");
+ int_lock->lock_cbk (frame, this);
+ goto out;
+ }
+
+ if (local->fd)
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if ((inodelk->locked_nodes[i] & LOCKED_YES) != LOCKED_YES)
+ continue;
+
+ if (local->fd) {
+ flock_use = &flock;
+ if (!local->transaction.eager_lock[i]) {
+ goto wind;
+ }
+
+ piggyback = 0;
+
+ LOCK (&local->fd->lock);
+ {
+ if (fd_ctx->lock_piggyback[i]) {
+ fd_ctx->lock_piggyback[i]--;
+ piggyback = 1;
+ } else {
+ fd_ctx->lock_acquired[i]--;
+ }
+ }
+ UNLOCK (&local->fd->lock);
+
+ if (piggyback) {
+ afr_unlock_inodelk_cbk (frame, (void *) (long) i,
+ this, 1, 0, NULL);
+ if (!--call_count)
+ break;
+ continue;
+ }
+
+ flock_use = &full_flock;
+ wind:
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_TRANSACTION,
+ AFR_UNLOCK_OP, flock_use, F_SETLK,
+ i);
+
+ STACK_WIND_COOKIE (frame, afr_unlock_inodelk_cbk,
+ (void *) (long)i,
+ priv->children[i],
+ priv->children[i]->fops->finodelk,
+ int_lock->domain, local->fd,
+ F_SETLK, flock_use, NULL);
+
+ if (!--call_count)
+ break;
+
+ } else {
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_TRANSACTION,
+ AFR_UNLOCK_OP, &flock, F_SETLK, i);
+
+ STACK_WIND_COOKIE (frame, afr_unlock_inodelk_cbk,
+ (void *) (long)i,
+ priv->children[i],
+ priv->children[i]->fops->inodelk,
+ int_lock->domain, &local->loc,
+ F_SETLK, &flock, NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+out:
+ return 0;
+}
+
+static int32_t
+afr_unlock_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ int32_t child_index = 0;
+ int lockee_no = 0;
+
+ priv = this->private;
+ lockee_no = (int)((long) cookie) / priv->child_count;
+ child_index = (int) ((long) cookie) % priv->child_count;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION,
+ AFR_UNLOCK_OP,
+ int_lock->lockee[lockee_no].basename, op_ret,
+ op_errno, (int) ((long)cookie));
+
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: unlock failed on %d, reason: %s",
+ local->loc.path, child_index, strerror (op_errno));
+ }
+
+ int_lock->lockee[lockee_no].locked_nodes[child_index] &= LOCKED_NO;
+ afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno, NULL);
+
+ return 0;
+}
+
+static int
+afr_unlock_entrylk (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int index = 0;
+ int lockee_no = 0;
+ int copies = 0;
+ int i = -1;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+ copies = priv->child_count;
+
+ call_count = afr_lockee_locked_nodes_count (int_lock);
+
+ int_lock->lk_call_count = call_count;
+
+ if (!call_count){
+ gf_log (this->name, GF_LOG_TRACE,
+ "No internal locks unlocked");
+ int_lock->lock_cbk (frame, this);
+ goto out;
+ }
+
+ for (i = 0; i < int_lock->lockee_count * priv->child_count; i++) {
+ lockee_no = i / copies;
+ index = i % copies;
+ if (int_lock->lockee[lockee_no].locked_nodes[index] & LOCKED_YES) {
+ AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_NB_TRANSACTION,
+ AFR_UNLOCK_OP,
+ int_lock->lockee[lockee_no].basename,
+ i);
+
+ STACK_WIND_COOKIE (frame, afr_unlock_entrylk_cbk,
+ (void *) (long) i,
+ priv->children[index],
+ priv->children[index]->fops->entrylk,
+ int_lock->domain,
+ &int_lock->lockee[lockee_no].loc,
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+out:
+ return 0;
+
+}
+
+static int32_t
+afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int cky = (long) cookie;
+ int child_index = 0;
+ int lockee_no = 0;
+
+ priv = this->private;
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ child_index = ((int)cky) % priv->child_count;
+ lockee_no = ((int)cky) / priv->child_count;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ if (op_errno == ENOSYS) {
+ /* return ENOTSUP */
+ gf_log (this->name, GF_LOG_ERROR,
+ "subvolume does not support locking. "
+ "please load features/locks xlator on server");
+ local->op_ret = op_ret;
+ int_lock->lock_op_ret = op_ret;
+ }
+
+ local->op_errno = op_errno;
+ int_lock->lock_op_errno = op_errno;
+ }
+
+ int_lock->lk_attempted_count++;
+ }
+ UNLOCK (&frame->lock);
+
+ if ((op_ret == -1) &&
+ (op_errno == ENOSYS)) {
+ afr_unlock (frame, this);
+ } else {
+ if (op_ret == 0) {
+ if (local->transaction.type == AFR_ENTRY_TRANSACTION ||
+ local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
+ int_lock->lockee[lockee_no].locked_nodes[child_index] |= LOCKED_YES;
+ int_lock->lockee[lockee_no].locked_count++;
+ int_lock->entrylk_lock_count++;
+ } else {
+ int_lock->locked_nodes[child_index] |= LOCKED_YES;
+ int_lock->lock_count++;
+ }
+ }
+ afr_lock_blocking (frame, this, cky + 1);
+ }
+
+ return 0;
+}
+
+static int32_t
+afr_blocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_TRANSACTION,
+ AFR_LOCK_OP, NULL, op_ret,
+ op_errno, (long) cookie);
+
+ afr_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata);
+ return 0;
+
+}
+
+static int32_t
+afr_blocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION,
+ AFR_LOCK_OP, NULL, op_ret,
+ op_errno, (long)cookie);
+
+ afr_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata);
+ return 0;
+}
+
+static int
+afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ switch (local->transaction.type) {
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ memcpy (inodelk->locked_nodes, int_lock->locked_nodes,
+ sizeof (*inodelk->locked_nodes) * priv->child_count);
+ inodelk->lock_count = int_lock->lock_count;
+ break;
+
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ case AFR_ENTRY_TRANSACTION:
+ /*entrylk_count is being used in both non-blocking and blocking
+ * modes */
+ break;
+ }
+
+ return 0;
+
+}
+
+static inline gf_boolean_t
+afr_is_entrylk (afr_internal_lock_t *int_lock,
+ afr_transaction_type trans_type)
+{
+ gf_boolean_t is_entrylk = _gf_false;
+
+ if ((int_lock->transaction_lk_type == AFR_SELFHEAL_LK) &&
+ int_lock->selfheal_lk_type == AFR_ENTRY_SELF_HEAL_LK) {
+
+ is_entrylk = _gf_true;
+
+ } else if ((int_lock->transaction_lk_type == AFR_TRANSACTION_LK) &&
+ (trans_type == AFR_ENTRY_TRANSACTION ||
+ trans_type == AFR_ENTRY_RENAME_TRANSACTION)) {
+
+ is_entrylk = _gf_true;
+
+ } else {
+ is_entrylk = _gf_false;
+ }
+
+ return is_entrylk;
+}
+
+static gf_boolean_t
+_is_lock_wind_needed (afr_local_t *local, int child_index)
+{
+ if (!local->child_up[child_index])
+ return _gf_false;
+
+ return _gf_true;
+}
+
+int
+afr_lock_blocking (call_frame_t *frame, xlator_t *this, int cookie)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ struct gf_flock flock = {0,};
+ uint64_t ctx = 0;
+ int ret = 0;
+ int child_index = 0;
+ int lockee_no = 0;
+ gf_boolean_t is_entrylk = _gf_false;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+ child_index = cookie % priv->child_count;
+ lockee_no = cookie / priv->child_count;
+ is_entrylk = afr_is_entrylk (int_lock, local->transaction.type);
+
+
+ if (!is_entrylk) {
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ flock.l_start = inodelk->flock.l_start;
+ flock.l_len = inodelk->flock.l_len;
+ flock.l_type = inodelk->flock.l_type;
+ }
+
+ if (local->fd) {
+ ret = fd_ctx_get (local->fd, this, &ctx);
+
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "unable to get fd ctx for fd=%p",
+ local->fd);
+
+ local->op_ret = -1;
+ int_lock->lock_op_ret = -1;
+
+ afr_copy_locked_nodes (frame, this);
+
+ afr_unlock (frame, this);
+
+ return 0;
+ }
+ }
+
+ if (int_lock->lk_expected_count == int_lock->lk_attempted_count) {
+ if ((is_entrylk && int_lock->entrylk_lock_count == 0) ||
+ (!is_entrylk && int_lock->lock_count == 0)) {
+ gf_log (this->name, GF_LOG_INFO,
+ "unable to lock on even one child");
+
+ local->op_ret = -1;
+ int_lock->lock_op_ret = -1;
+
+ afr_copy_locked_nodes (frame, this);
+
+ afr_unlock(frame, this);
+
+ return 0;
+ }
+ }
+
+ if (int_lock->lk_expected_count == int_lock->lk_attempted_count) {
+ /* we're done locking */
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "we're done locking");
+
+ afr_copy_locked_nodes (frame, this);
+
+ int_lock->lock_op_ret = 0;
+ int_lock->lock_cbk (frame, this);
+ return 0;
+ }
+
+ if (!_is_lock_wind_needed (local, child_index)) {
+ afr_lock_blocking (frame, this, cookie + 1);
+ return 0;
+ }
+
+ switch (local->transaction.type) {
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+
+ if (local->fd) {
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_TRANSACTION,
+ AFR_LOCK_OP, &flock, F_SETLKW,
+ child_index);
+
+ STACK_WIND_COOKIE (frame, afr_blocking_inodelk_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->finodelk,
+ int_lock->domain, local->fd,
+ F_SETLKW, &flock, NULL);
+
+ } else {
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_TRANSACTION,
+ AFR_LOCK_OP, &flock, F_SETLKW,
+ child_index);
+
+ STACK_WIND_COOKIE (frame, afr_blocking_inodelk_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->inodelk,
+ int_lock->domain, &local->loc,
+ F_SETLKW, &flock, NULL);
+ }
+
+ break;
+
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ case AFR_ENTRY_TRANSACTION:
+ /*Accounting for child_index increments on 'down'
+ *and 'fd-less' children */
+
+ if (local->fd) {
+ AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_TRANSACTION,
+ AFR_LOCK_OP,
+ int_lock->lockee[lockee_no].basename,
+ cookie);
+
+ STACK_WIND_COOKIE (frame, afr_blocking_entrylk_cbk,
+ (void *) (long) cookie,
+ priv->children[child_index],
+ priv->children[child_index]->fops->fentrylk,
+ int_lock->domain, local->fd,
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
+ } else {
+ AFR_TRACE_ENTRYLK_IN (frame, this,
+ AFR_ENTRYLK_TRANSACTION,
+ AFR_LOCK_OP, local->transaction.basename,
+ child_index);
+
+ STACK_WIND_COOKIE (frame, afr_blocking_entrylk_cbk,
+ (void *) (long) cookie,
+ priv->children[child_index],
+ priv->children[child_index]->fops->entrylk,
+ int_lock->domain,
+ &int_lock->lockee[lockee_no].loc,
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
+ }
+
+ break;
+ }
+
+ return 0;
+}
+
+int32_t
+afr_blocking_lock (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int up_count = 0;
+
+ priv = this->private;
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ switch (local->transaction.type) {
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ initialize_inodelk_variables (frame, this);
+ break;
+
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ case AFR_ENTRY_TRANSACTION:
+ up_count = afr_up_children_count (local->child_up,
+ priv->child_count);
+ int_lock->lk_call_count = int_lock->lk_expected_count
+ = (int_lock->lockee_count *
+ up_count);
+ initialize_entrylk_variables (frame, this);
+ break;
+ }
+
+ afr_lock_blocking (frame, this, 0);
+
+ return 0;
+}
+
+static int32_t
+afr_nonblocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ int call_count = 0;
+ int child_index = (long) cookie;
+ int copies = 0;
+ int index = 0;
+ int lockee_no = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ copies = priv->child_count;
+ index = child_index % copies;
+ lockee_no = child_index / copies;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION,
+ AFR_LOCK_OP,
+ int_lock->lockee[lockee_no].basename, op_ret,
+ op_errno, (long) cookie);
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret < 0 ) {
+ if (op_errno == ENOSYS) {
+ /* return ENOTSUP */
+ gf_log (this->name, GF_LOG_ERROR,
+ "subvolume does not support locking. "
+ "please load features/locks xlator on server");
+ local->op_ret = op_ret;
+ int_lock->lock_op_ret = op_ret;
+
+ int_lock->lock_op_errno = op_errno;
+ local->op_errno = op_errno;
+ }
+ } else if (op_ret == 0) {
+ int_lock->lockee[lockee_no].locked_nodes[index] |= \
+ LOCKED_YES;
+ int_lock->lockee[lockee_no].locked_count++;
+ int_lock->entrylk_lock_count++;
+ }
+
+ call_count = --int_lock->lk_call_count;
+ }
+ UNLOCK (&frame->lock);
+
+ if (call_count == 0) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "Last locking reply received");
+ /* all locks successful. Proceed to call FOP */
+ if (int_lock->entrylk_lock_count ==
+ int_lock->lk_expected_count) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "All servers locked. Calling the cbk");
+ int_lock->lock_op_ret = 0;
+ int_lock->lock_cbk (frame, this);
+ }
+ /* Not all locks were successful. Unlock and try locking
+ again, this time with serially blocking locks */
+ else {
+ gf_log (this->name, GF_LOG_TRACE,
+ "%d servers locked. Trying again with blocking calls",
+ int_lock->lock_count);
+
+ afr_unlock(frame, this);
+ }
+ }
+
+ return 0;
+}
+
+int
+afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int copies = 0;
+ int index = 0;
+ int lockee_no = 0;
+ int32_t call_count = 0;
+ int i = 0;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+
+ copies = priv->child_count;
+ initialize_entrylk_variables (frame, this);
+
+ if (local->fd) {
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
+ if (!fd_ctx) {
+ gf_log (this->name, GF_LOG_INFO,
+ "unable to get fd ctx for fd=%p",
+ local->fd);
+
+ local->op_ret = -1;
+ int_lock->lock_op_ret = -1;
+ local->op_errno = EINVAL;
+ int_lock->lock_op_errno = EINVAL;
+
+ afr_unlock (frame, this);
+ return -1;
+ }
+
+ call_count = int_lock->lockee_count * internal_lock_count (frame, this);
+ int_lock->lk_call_count = call_count;
+ int_lock->lk_expected_count = call_count;
+
+ if (!call_count) {
+ gf_log (this->name, GF_LOG_INFO,
+ "fd not open on any subvolumes. aborting.");
+ afr_unlock (frame, this);
+ goto out;
+ }
+
+ /* Send non-blocking entrylk calls only on up children
+ and where the fd has been opened */
+ for (i = 0; i < int_lock->lockee_count*priv->child_count; i++) {
+ index = i%copies;
+ lockee_no = i/copies;
+ if (local->child_up[index]) {
+ AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_NB_TRANSACTION,
+ AFR_LOCK_OP,
+ int_lock->lockee[lockee_no].basename,
+ i);
+
+ STACK_WIND_COOKIE (frame, afr_nonblocking_entrylk_cbk,
+ (void *) (long) i,
+ priv->children[index],
+ priv->children[index]->fops->fentrylk,
+ this->name, local->fd,
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_LOCK_NB, ENTRYLK_WRLCK,
+ NULL);
+ if (!--call_count)
+ break;
+ }
+ }
+ } else {
+ call_count = int_lock->lockee_count * internal_lock_count (frame, this);
+ int_lock->lk_call_count = call_count;
+ int_lock->lk_expected_count = call_count;
+
+ for (i = 0; i < int_lock->lockee_count*priv->child_count; i++) {
+ index = i%copies;
+ lockee_no = i/copies;
+ if (local->child_up[index]) {
+ AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_NB_TRANSACTION,
+ AFR_LOCK_OP,
+ int_lock->lockee[lockee_no].basename,
+ i);
+
+ STACK_WIND_COOKIE (frame, afr_nonblocking_entrylk_cbk,
+ (void *) (long) i,
+ priv->children[index],
+ priv->children[index]->fops->entrylk,
+ this->name, &int_lock->lockee[lockee_no].loc,
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_LOCK_NB, ENTRYLK_WRLCK,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+ }
+out:
+ return 0;
+}
+
+int32_t
+afr_nonblocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
+ afr_local_t *local = NULL;
+ int call_count = 0;
+ int child_index = (long) cookie;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+
+ AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_NB_TRANSACTION,
+ AFR_LOCK_OP, NULL, op_ret,
+ op_errno, (long) cookie);
+
+ if (local->fd)
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret < 0) {
+ if (op_errno == ENOSYS) {
+ /* return ENOTSUP */
+ gf_log (this->name, GF_LOG_ERROR,
+ "subvolume does not support locking. "
+ "please load features/locks xlator on "
+ "server");
+ local->op_ret = op_ret;
+ int_lock->lock_op_ret = op_ret;
+ int_lock->lock_op_errno = op_errno;
+ local->op_errno = op_errno;
+ }
+ if (local->transaction.eager_lock)
+ local->transaction.eager_lock[child_index] = 0;
+ } else {
+ inodelk->locked_nodes[child_index] |= LOCKED_YES;
+ inodelk->lock_count++;
+
+ if (local->transaction.eager_lock &&
+ local->transaction.eager_lock[child_index] &&
+ local->fd) {
+ /* piggybacked */
+ if (op_ret == 1) {
+ /* piggybacked */
+ } else if (op_ret == 0) {
+ /* lock acquired from server */
+ fd_ctx->lock_acquired[child_index]++;
+ }
+ }
+ }
+
+ call_count = --int_lock->lk_call_count;
+ }
+ UNLOCK (&frame->lock);
+
+ if (call_count == 0) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "Last inode locking reply received");
+ /* all locks successful. Proceed to call FOP */
+ if (inodelk->lock_count == int_lock->lk_expected_count) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "All servers locked. Calling the cbk");
+ int_lock->lock_op_ret = 0;
+ int_lock->lock_cbk (frame, this);
+ }
+ /* Not all locks were successful. Unlock and try locking
+ again, this time with serially blocking locks */
+ else {
+ gf_log (this->name, GF_LOG_TRACE,
+ "%d servers locked. Trying again with blocking calls",
+ int_lock->lock_count);
+
+ afr_unlock(frame, this);
+ }
+ }
+
+ return 0;
+}
+
+int
+afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int32_t call_count = 0;
+ int i = 0;
+ int ret = 0;
+ struct gf_flock flock = {0,};
+ struct gf_flock full_flock = {0,};
+ struct gf_flock *flock_use = NULL;
+ int piggyback = 0;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+
+ flock.l_start = inodelk->flock.l_start;
+ flock.l_len = inodelk->flock.l_len;
+ flock.l_type = inodelk->flock.l_type;
+
+ full_flock.l_type = inodelk->flock.l_type;
+
+ initialize_inodelk_variables (frame, this);
+
+ if (local->fd) {
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
+ if (!fd_ctx) {
+ gf_log (this->name, GF_LOG_INFO,
+ "unable to get fd ctx for fd=%p",
+ local->fd);
+
+ local->op_ret = -1;
+ int_lock->lock_op_ret = -1;
+ local->op_errno = EINVAL;
+ int_lock->lock_op_errno = EINVAL;
+
+ afr_unlock (frame, this);
+ ret = -1;
+ goto out;
+ }
+
+ call_count = internal_lock_count (frame, this);
+ int_lock->lk_call_count = call_count;
+ int_lock->lk_expected_count = call_count;
+
+ if (!call_count) {
+ gf_log (this->name, GF_LOG_INFO,
+ "fd not open on any subvolumes. aborting.");
+ afr_unlock (frame, this);
+ goto out;
+ }
+
+ /* Send non-blocking inodelk calls only on up children
+ and where the fd has been opened */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->child_up[i])
+ continue;
+
+ flock_use = &flock;
+ if (!local->transaction.eager_lock_on) {
+ goto wind;
+ }
+
+ piggyback = 0;
+ local->transaction.eager_lock[i] = 1;
+
+ afr_set_delayed_post_op (frame, this);
+
+ LOCK (&local->fd->lock);
+ {
+ if (fd_ctx->lock_acquired[i]) {
+ fd_ctx->lock_piggyback[i]++;
+ piggyback = 1;
+ }
+ }
+ UNLOCK (&local->fd->lock);
+
+ if (piggyback) {
+ /* (op_ret == 1) => indicate piggybacked lock */
+ afr_nonblocking_inodelk_cbk (frame, (void *) (long) i,
+ this, 1, 0, NULL);
+ if (!--call_count)
+ break;
+ continue;
+ }
+ flock_use = &full_flock;
+ wind:
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_NB_TRANSACTION,
+ AFR_LOCK_OP, flock_use, F_SETLK, i);
+
+ STACK_WIND_COOKIE (frame, afr_nonblocking_inodelk_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->finodelk,
+ int_lock->domain, local->fd,
+ F_SETLK, flock_use, NULL);
+
+ if (!--call_count)
+ break;
+ }
+ } else {
+ call_count = internal_lock_count (frame, this);
+ int_lock->lk_call_count = call_count;
+ int_lock->lk_expected_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->child_up[i])
+ continue;
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_NB_TRANSACTION,
+ AFR_LOCK_OP, &flock, F_SETLK, i);
+
+ STACK_WIND_COOKIE (frame, afr_nonblocking_inodelk_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->inodelk,
+ int_lock->domain, &local->loc,
+ F_SETLK, &flock, NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+out:
+ return ret;
+}
+
+int32_t
+afr_unlock (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (transaction_lk_op (local)) {
+ if (is_afr_lock_transaction (local))
+ afr_unlock_inodelk (frame, this);
+ else
+ afr_unlock_entrylk (frame, this);
+
+ } else {
+ if (is_afr_lock_selfheal (local))
+ afr_unlock_inodelk (frame, this);
+ else
+ afr_unlock_entrylk (frame, this);
+ }
+
+ return 0;
+}
+
+int
+afr_mark_locked_nodes (xlator_t *this, fd_t *fd,
+ unsigned char *locked_nodes)
+{
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fdctx = NULL;
+ uint64_t tmp = 0;
+ int ret = 0;
+
+ priv = this->private;
+
+ ret = afr_fd_ctx_set (this, fd);
+ if (ret)
+ goto out;
+
+ ret = fd_ctx_get (fd, this, &tmp);
+ if (ret) {
+ gf_log (this->name, GF_LOG_INFO,
+ "failed to get the fd ctx");
+ goto out;
+ }
+ fdctx = (afr_fd_ctx_t *) (long) tmp;
+
+ GF_ASSERT (fdctx->locked_on);
+
+ memcpy (fdctx->locked_on, locked_nodes,
+ priv->child_count);
+
+out:
+ return ret;
+}
+
+static int
+__is_fd_saved (xlator_t *this, fd_t *fd)
+{
+ afr_locked_fd_t *locked_fd = NULL;
+ afr_private_t *priv = NULL;
+ int found = 0;
+
+ priv = this->private;
+
+ list_for_each_entry (locked_fd, &priv->saved_fds, list) {
+ if (locked_fd->fd == fd) {
+ found = 1;
+ break;
+ }
+ }
+
+ return found;
+}
+
+static int
+__afr_save_locked_fd (xlator_t *this, fd_t *fd)
+{
+ afr_private_t *priv = NULL;
+ afr_locked_fd_t *locked_fd = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ locked_fd = GF_CALLOC (1, sizeof (*locked_fd),
+ gf_afr_mt_locked_fd);
+ if (!locked_fd) {
+ ret = -1;
+ goto out;
+ }
+
+ locked_fd->fd = fd;
+ INIT_LIST_HEAD (&locked_fd->list);
+
+ list_add_tail (&locked_fd->list, &priv->saved_fds);
+
+out:
+ return ret;
+}
+
+int
+afr_save_locked_fd (xlator_t *this, fd_t *fd)
+{
+ afr_private_t *priv = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ pthread_mutex_lock (&priv->mutex);
+ {
+ if (__is_fd_saved (this, fd)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "fd=%p already saved", fd);
+ goto unlock;
+ }
+
+ ret = __afr_save_locked_fd (this, fd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_INFO,
+ "fd=%p could not be saved", fd);
+ goto unlock;
+ }
+ }
+unlock:
+ pthread_mutex_unlock (&priv->mutex);
+
+ return ret;
+}
+
+static int
+afr_lock_recovery_cleanup (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_locked_fd_t *locked_fd = NULL;
+
+ local = frame->local;
+
+ locked_fd = local->locked_fd;
+
+ STACK_DESTROY (frame->root);
+ afr_local_cleanup (local, this);
+
+ afr_save_locked_fd (this, locked_fd->fd);
+
+ return 0;
+
+}
+
+static int
+afr_get_source_lock_recovery (xlator_t *this, fd_t *fd)
+{
+ afr_fd_ctx_t *fdctx = NULL;
+ afr_private_t *priv = NULL;
+ uint64_t tmp = 0;
+ int i = 0;
+ int source_child = -1;
+ int ret = 0;
+
+ priv = this->private;
+
+ ret = fd_ctx_get (fd, this, &tmp);
+ if (ret)
+ goto out;
+
+ fdctx = (afr_fd_ctx_t *) (long) tmp;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (fdctx->locked_on[i]) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Found lock recovery source=%d", i);
+ source_child = i;
+ break;
+ }
+ }
+
+out:
+ return source_child;
+
+}
+
+int32_t
+afr_get_locks_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata);
+int32_t
+afr_recover_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int32_t source_child = 0;
+ struct gf_flock flock = {0,};
+
+ local = frame->local;
+ priv = this->private;
+
+ if (op_ret) {
+ gf_log (this->name, GF_LOG_INFO,
+ "lock recovery failed");
+ goto cleanup;
+ }
+
+ source_child = local->source_child;
+
+ memcpy (&flock, lock, sizeof (*lock));
+
+ STACK_WIND_COOKIE (frame, afr_get_locks_fd_cbk,
+ (void *) (long) source_child,
+ priv->children[source_child],
+ priv->children[source_child]->fops->lk,
+ local->fd, F_GETLK_FD, &flock, NULL);
+
+ return 0;
+
+cleanup:
+ afr_lock_recovery_cleanup (frame, this);
+ return 0;
+}
+
+int
+afr_recover_lock (call_frame_t *frame, xlator_t *this,
+ struct gf_flock *flock)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int32_t lock_recovery_child = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ lock_recovery_child = local->lock_recovery_child;
+
+ frame->root->lk_owner = flock->l_owner;
+
+ STACK_WIND_COOKIE (frame, afr_recover_lock_cbk,
+ (void *) (long) lock_recovery_child,
+ priv->children[lock_recovery_child],
+ priv->children[lock_recovery_child]->fops->lk,
+ local->fd, F_SETLK, flock, NULL);
+
+ return 0;
+}
+
+static int
+is_afr_lock_eol (struct gf_flock *lock)
+{
+ int ret = 0;
+
+ if ((lock->l_type == GF_LK_EOL))
+ ret = 1;
+
+ return ret;
+}
+
+int32_t
+afr_get_locks_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ if (op_ret) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Failed to get locks on fd");
+ goto cleanup;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Got a lock on fd");
+
+ if (is_afr_lock_eol (lock)) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Reached EOL on locks on fd");
+ goto cleanup;
+ }
+
+ afr_recover_lock (frame, this, lock);
+
+ return 0;
+
+cleanup:
+ afr_lock_recovery_cleanup (frame, this);
+
+ return 0;
+}
+
+static int
+afr_lock_recovery (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ fd_t *fd = NULL;
+ int ret = 0;
+ int32_t source_child = 0;
+ struct gf_flock flock = {0,};
+
+ priv = this->private;
+ local = frame->local;
+
+ fd = local->fd;
+
+ source_child = afr_get_source_lock_recovery (this, fd);
+ if (source_child < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not recover locks due to lock "
+ "split brain");
+ ret = -1;
+ goto out;
+ }
+
+ local->source_child = source_child;
+
+ /* the flock can be zero filled as we're querying incrementally
+ the locks held on the fd.
+ */
+ STACK_WIND_COOKIE (frame, afr_get_locks_fd_cbk,
+ (void *) (long) source_child,
+ priv->children[source_child],
+ priv->children[source_child]->fops->lk,
+ local->fd, F_GETLK_FD, &flock, NULL);
+
+out:
+ return ret;
+}
+
+
+static int
+afr_mark_fd_opened (xlator_t *this, fd_t *fd, int32_t child_index)
+{
+ afr_fd_ctx_t *fdctx = NULL;
+ uint64_t tmp = 0;
+ int ret = 0;
+
+ ret = fd_ctx_get (fd, this, &tmp);
+ if (ret)
+ goto out;
+
+ fdctx = (afr_fd_ctx_t *) (long) tmp;
+
+ fdctx->opened_on[child_index] = AFR_FD_OPENED;
+
+out:
+ return ret;
+}
+
+int32_t
+afr_lock_recovery_preopen_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ dict_t *xdata)
+{
+ int32_t child_index = (long )cookie;
+ int ret = 0;
+
+ if (op_ret) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Reopen during lock-recovery failed");
+ goto cleanup;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Open succeeded => proceed to recover locks");
+
+ ret = afr_lock_recovery (frame, this);
+ if (ret) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Lock recovery failed");
+ goto cleanup;
+ }
+
+ ret = afr_mark_fd_opened (this, fd, child_index);
+ if (ret) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Marking fd open failed");
+ goto cleanup;
+ }
+
+ return 0;
+
+cleanup:
+ afr_lock_recovery_cleanup (frame, this);
+ return 0;
+}
+
+static int
+afr_lock_recovery_preopen (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ uint64_t tmp = 0;
+ afr_fd_ctx_t *fdctx = NULL;
+ loc_t loc = {0,};
+ int32_t child_index = 0;
+ int ret = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ GF_ASSERT (local && local->fd);
+
+ ret = fd_ctx_get (local->fd, this, &tmp);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to get the context of fd",
+ uuid_utoa (local->fd->inode->gfid));
+ fdctx = (afr_fd_ctx_t *) (long) tmp;
+ /* TODO: instead we should return from the function */
+ GF_ASSERT (fdctx);
+
+ child_index = local->lock_recovery_child;
+
+ inode_path (local->fd->inode, NULL, (char **)&loc.path);
+ loc.name = strrchr (loc.path, '/');
+ loc.inode = inode_ref (local->fd->inode);
+ loc.parent = inode_parent (local->fd->inode, 0, NULL);
+
+
+ STACK_WIND_COOKIE (frame, afr_lock_recovery_preopen_cbk,
+ (void *)(long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->open,
+ &loc, fdctx->flags, local->fd, NULL);
+
+ return 0;
+}
+
+static int
+is_fd_opened (fd_t *fd, int32_t child_index)
+{
+ afr_fd_ctx_t *fdctx = NULL;
+ uint64_t tmp = 0;
+ int ret = 0;
+
+ ret = fd_ctx_get (fd, THIS, &tmp);
+ if (ret)
+ goto out;
+
+ fdctx = (afr_fd_ctx_t *) (long) tmp;
+
+ if (fdctx->opened_on[child_index] == AFR_FD_OPENED)
+ ret = 1;
+
+out:
+ return ret;
+}
+
+int
+afr_attempt_lock_recovery (xlator_t *this, int32_t child_index)
+{
+ call_frame_t *frame = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_locked_fd_t *locked_fd = NULL;
+ afr_locked_fd_t *tmp = NULL;
+ int ret = -1;
+ struct list_head locks_list = {0,};
+ int32_t op_errno = 0;
+
+
+ priv = this->private;
+
+ if (list_empty (&priv->saved_fds))
+ goto out;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ frame->local = local;
+
+ INIT_LIST_HEAD (&locks_list);
+
+ pthread_mutex_lock (&priv->mutex);
+ {
+ list_splice_init (&priv->saved_fds, &locks_list);
+ }
+ pthread_mutex_unlock (&priv->mutex);
+
+ list_for_each_entry_safe (locked_fd, tmp,
+ &locks_list, list) {
+
+ list_del_init (&locked_fd->list);
+
+ local->fd = fd_ref (locked_fd->fd);
+ local->lock_recovery_child = child_index;
+ local->locked_fd = locked_fd;
+
+ if (!is_fd_opened (locked_fd->fd, child_index)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "attempting open before lock "
+ "recovery");
+ afr_lock_recovery_preopen (frame, this);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "attempting lock recovery "
+ "without a preopen");
+ afr_lock_recovery (frame, this);
+ }
+ }
+
+out:
+ if ((ret < 0) && frame)
+ AFR_STACK_DESTROY (frame);
+ return ret;
+}
+
+int
+afr_lk_transfer_datalock (call_frame_t *dst, call_frame_t *src, char *dom,
+ unsigned int child_count)
+{
+ afr_local_t *dst_local = NULL;
+ afr_local_t *src_local = NULL;
+ afr_internal_lock_t *dst_lock = NULL;
+ afr_internal_lock_t *src_lock = NULL;
+ afr_inodelk_t *dst_inodelk = NULL;
+ afr_inodelk_t *src_inodelk = NULL;
+ int ret = -1;
+
+ src_local = src->local;
+ src_lock = &src_local->internal_lock;
+ src_inodelk = afr_get_inodelk (src_lock, dom);
+ dst_local = dst->local;
+ dst_lock = &dst_local->internal_lock;
+ dst_inodelk = afr_get_inodelk (dst_lock, dom);
+ if (!dst_inodelk || !src_inodelk)
+ goto out;
+ if (src_inodelk->locked_nodes) {
+ memcpy (dst_inodelk->locked_nodes, src_inodelk->locked_nodes,
+ sizeof (*dst_inodelk->locked_nodes) * child_count);
+ memset (src_inodelk->locked_nodes, 0,
+ sizeof (*src_inodelk->locked_nodes) * child_count);
+ }
+
+ dst_lock->transaction_lk_type = src_lock->transaction_lk_type;
+ dst_lock->selfheal_lk_type = src_lock->selfheal_lk_type;
+ dst_inodelk->lock_count = src_inodelk->lock_count;
+ src_inodelk->lock_count = 0;
+ ret = 0;
+out:
+ return ret;
+}
diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h
new file mode 100644
index 000000000..73594f265
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-mem-types.h
@@ -0,0 +1,51 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef __AFR_MEM_TYPES_H__
+#define __AFR_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_afr_mem_types_ {
+ gf_afr_mt_iovec = gf_common_mt_end + 1,
+ gf_afr_mt_afr_fd_ctx_t,
+ gf_afr_mt_afr_private_t,
+ gf_afr_mt_int32_t,
+ gf_afr_mt_char,
+ gf_afr_mt_xattr_key,
+ gf_afr_mt_dict_t,
+ gf_afr_mt_xlator_t,
+ gf_afr_mt_iatt,
+ gf_afr_mt_int,
+ gf_afr_mt_afr_node_character,
+ gf_afr_mt_sh_diff_loop_state,
+ gf_afr_mt_uint8_t,
+ gf_afr_mt_loc_t,
+ gf_afr_mt_entry_name,
+ gf_afr_mt_pump_priv,
+ gf_afr_mt_locked_fd,
+ gf_afr_mt_inode_ctx_t,
+ gf_afr_fd_paused_call_t,
+ gf_afr_mt_crawl_data_t,
+ gf_afr_mt_brick_pos_t,
+ gf_afr_mt_shd_bool_t,
+ gf_afr_mt_shd_timer_t,
+ gf_afr_mt_shd_event_t,
+ gf_afr_mt_time_t,
+ gf_afr_mt_pos_data_t,
+ gf_afr_mt_reply_t,
+ gf_afr_mt_stats_t,
+ gf_afr_mt_shd_crawl_event_t,
+ gf_afr_mt_uint64_t,
+ gf_afr_mt_end
+};
+#endif
+
diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c
new file mode 100644
index 000000000..643a5d692
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-open.c
@@ -0,0 +1,382 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <libgen.h>
+#include <unistd.h>
+#include <fnmatch.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <signal.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "afr.h"
+#include "dict.h"
+#include "xlator.h"
+#include "hashfn.h"
+#include "logging.h"
+#include "stack.h"
+#include "list.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "common-utils.h"
+#include "compat-errno.h"
+#include "compat.h"
+#include "byte-order.h"
+#include "statedump.h"
+
+#include "fd.h"
+
+#include "afr-inode-read.h"
+#include "afr-inode-write.h"
+#include "afr-dir-read.h"
+#include "afr-dir-write.h"
+#include "afr-transaction.h"
+#include "afr-self-heal.h"
+#include "afr-self-heal-common.h"
+
+int
+afr_stale_child_up (afr_local_t *local, xlator_t *this)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int up = -1;
+
+ priv = this->private;
+
+ if (!local->fresh_children)
+ local->fresh_children = afr_children_create (priv->child_count);
+ if (!local->fresh_children)
+ goto out;
+
+ afr_inode_get_read_ctx (this, local->fd->inode, local->fresh_children);
+ if (priv->child_count == afr_get_children_count (local->fresh_children,
+ priv->child_count))
+ goto out;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->child_up[i])
+ continue;
+ if (afr_is_child_present (local->fresh_children,
+ priv->child_count, i))
+ continue;
+ up = i;
+ break;
+ }
+out:
+ return up;
+}
+
+void
+afr_perform_data_self_heal (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ inode_t *inode = NULL;
+ int st_child = -1;
+ char reason[64] = {0};
+
+ local = frame->local;
+ sh = &local->self_heal;
+ inode = local->fd->inode;
+
+ if (!IA_ISREG (inode->ia_type))
+ goto out;
+
+ st_child = afr_stale_child_up (local, this);
+ if (st_child < 0)
+ goto out;
+
+ sh->do_data_self_heal = _gf_true;
+ sh->do_metadata_self_heal = _gf_true;
+ sh->do_gfid_self_heal = _gf_true;
+ sh->do_missing_entry_self_heal = _gf_true;
+
+ snprintf (reason, sizeof (reason), "stale subvolume %d detected",
+ st_child);
+ afr_launch_self_heal (frame, this, inode, _gf_true, inode->ia_type,
+ reason, NULL, NULL);
+out:
+ return;
+}
+
+int
+afr_open_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t * local = frame->local;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ if (afr_open_only_data_self_heal (priv->data_self_heal))
+ afr_perform_data_self_heal (frame, this);
+ AFR_STACK_UNWIND (open, frame, local->op_ret, local->op_errno,
+ local->fd, xdata);
+ return 0;
+}
+
+
+int
+afr_open_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ fd_t *fd, dict_t *xdata)
+{
+ afr_local_t * local = NULL;
+ int ret = 0;
+ int call_count = -1;
+ int child_index = (long) cookie;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ }
+
+ if (op_ret >= 0) {
+ local->op_ret = op_ret;
+ local->success_count++;
+
+ ret = afr_child_fd_ctx_set (this, fd, child_index,
+ local->cont.open.flags);
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ goto unlock;
+ }
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ if ((local->cont.open.flags & O_TRUNC)
+ && (local->op_ret >= 0)) {
+ STACK_WIND (frame, afr_open_ftruncate_cbk,
+ this, this->fops->ftruncate,
+ fd, 0, NULL);
+ } else {
+ if (afr_open_only_data_self_heal (priv->data_self_heal))
+ afr_perform_data_self_heal (frame, this);
+ AFR_STACK_UNWIND (open, frame, local->op_ret,
+ local->op_errno, local->fd, xdata);
+ }
+ }
+
+ return 0;
+}
+
+int
+afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ int i = 0;
+ int ret = -1;
+ int32_t call_count = 0;
+ int32_t op_errno = 0;
+ int32_t wind_flags = flags & (~O_TRUNC);
+ //We can't let truncation to happen outside transaction.
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+ VALIDATE_OR_GOTO (loc, out);
+
+ priv = this->private;
+
+ if (flags & (O_CREAT|O_TRUNC)) {
+ QUORUM_CHECK(open,out);
+ }
+
+ if (afr_is_split_brain (this, loc->inode)) {
+ /* self-heal failed */
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to open as split brain seen, returning EIO");
+ op_errno = EIO;
+ goto out;
+ }
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ call_count = local->call_count;
+ loc_copy (&local->loc, loc);
+
+ local->cont.open.flags = flags;
+
+ local->fd = fd_ref (fd);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_open_cbk, (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->open,
+ loc, wind_flags, fd, xdata);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (open, frame, -1, op_errno, fd, xdata);
+
+ return 0;
+}
+
+int
+afr_openfd_fix_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int call_count = 0;
+ int child_index = (long) cookie;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (op_ret >= 0) {
+ gf_log (this->name, GF_LOG_DEBUG, "fd for %s opened "
+ "successfully on subvolume %s", local->loc.path,
+ priv->children[child_index]->name);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to open %s "
+ "on subvolume %s", local->loc.path,
+ priv->children[child_index]->name);
+ }
+
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
+ if (!fd_ctx) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to get fd context, %p", local->fd);
+ goto out;
+ }
+
+ LOCK (&local->fd->lock);
+ {
+ if (op_ret >= 0) {
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
+ } else {
+ fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
+ }
+ }
+ UNLOCK (&local->fd->lock);
+out:
+ call_count = afr_frame_return (frame);
+ if (call_count == 0)
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+void
+afr_fix_open (xlator_t *this, fd_t *fd, size_t need_open_count, int *need_open)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int32_t op_errno = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ priv = this->private;
+
+ if (!afr_is_fd_fixable (fd) || !need_open || !need_open_count)
+ goto out;
+
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx) {
+ ret = -1;
+ goto out;
+ }
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ local->loc.inode = inode_ref (fd->inode);
+ ret = loc_path (&local->loc, NULL);
+ if (ret < 0)
+ goto out;
+
+ local->fd = fd_ref (fd);
+ local->call_count = need_open_count;
+
+ gf_log (this->name, GF_LOG_DEBUG, "need open count: %zd",
+ need_open_count);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!need_open[i])
+ continue;
+
+ if (IA_IFDIR == fd->inode->ia_type) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "opening fd for dir %s on subvolume %s",
+ local->loc.path, priv->children[i]->name);
+
+ STACK_WIND_COOKIE (frame, afr_openfd_fix_open_cbk,
+ (void*) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->opendir,
+ &local->loc, local->fd,
+ NULL);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "opening fd for file %s on subvolume %s",
+ local->loc.path, priv->children[i]->name);
+
+ STACK_WIND_COOKIE (frame, afr_openfd_fix_open_cbk,
+ (void *)(long) i,
+ priv->children[i],
+ priv->children[i]->fops->open,
+ &local->loc,
+ fd_ctx->flags & (~O_TRUNC),
+ local->fd, NULL);
+ }
+
+ }
+ op_errno = 0;
+ ret = 0;
+out:
+ if (op_errno)
+ ret = -1; //For handling ALLOC_OR_GOTO
+ if (ret && frame)
+ AFR_STACK_DESTROY (frame);
+}
diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.c b/xlators/cluster/afr/src/afr-self-heal-algorithm.c
new file mode 100644
index 000000000..83846f152
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-self-heal-algorithm.c
@@ -0,0 +1,837 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#include <openssl/md5.h>
+#include "glusterfs.h"
+#include "afr.h"
+#include "xlator.h"
+#include "dict.h"
+#include "xlator.h"
+#include "hashfn.h"
+#include "logging.h"
+#include "stack.h"
+#include "list.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "common-utils.h"
+#include "compat-errno.h"
+#include "compat.h"
+#include "byte-order.h"
+
+#include "afr-transaction.h"
+#include "afr-self-heal.h"
+#include "afr-self-heal-common.h"
+#include "afr-self-heal-algorithm.h"
+
+/*
+ This file contains the various self-heal algorithms
+*/
+
+static int
+sh_loop_driver (call_frame_t *sh_frame, xlator_t *this,
+ gf_boolean_t is_first_call, call_frame_t *old_loop_frame);
+static int
+sh_loop_return (call_frame_t *sh_frame, xlator_t *this, call_frame_t *loop_frame,
+ int32_t op_ret, int32_t op_errno);
+static int
+sh_destroy_frame (call_frame_t *frame, xlator_t *this)
+{
+ if (!frame)
+ goto out;
+
+ AFR_STACK_DESTROY (frame);
+out:
+ return 0;
+}
+
+static void
+sh_private_cleanup (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_sh_algo_private_t *sh_priv = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ sh_priv = sh->private;
+ GF_FREE (sh_priv);
+}
+
+static int
+sh_number_of_writes_needed (unsigned char *write_needed, int child_count)
+{
+ int writes = 0;
+ int i = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (write_needed[i])
+ writes++;
+ }
+
+ return writes;
+}
+
+
+static int
+sh_loop_driver_done (call_frame_t *sh_frame, xlator_t *this,
+ call_frame_t *last_loop_frame)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_sh_algo_private_t *sh_priv = NULL;
+ int32_t total_blocks = 0;
+ int32_t diff_blocks = 0;
+
+ local = sh_frame->local;
+ sh = &local->self_heal;
+ sh_priv = sh->private;
+ if (sh_priv) {
+ total_blocks = sh_priv->total_blocks;
+ diff_blocks = sh_priv->diff_blocks;
+ }
+
+ sh_private_cleanup (sh_frame, this);
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
+ GF_ASSERT (!last_loop_frame);
+ //loop_finish should have happened and the old_loop should be NULL
+ gf_log (this->name, GF_LOG_DEBUG,
+ "self-heal aborting on %s",
+ local->loc.path);
+
+ local->self_heal.algo_abort_cbk (sh_frame, this);
+ } else {
+ GF_ASSERT (last_loop_frame);
+ if (diff_blocks == total_blocks) {
+ gf_log (this->name, GF_LOG_DEBUG, "full self-heal "
+ "completed on %s",local->loc.path);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "diff self-heal on %s: completed. "
+ "(%d blocks of %d were different (%.2f%%))",
+ local->loc.path, diff_blocks, total_blocks,
+ ((diff_blocks * 1.0)/total_blocks) * 100);
+ }
+
+ sh->old_loop_frame = last_loop_frame;
+ local->self_heal.algo_completion_cbk (sh_frame, this);
+ }
+
+ return 0;
+}
+
+int
+sh_loop_finish (call_frame_t *loop_frame, xlator_t *this)
+{
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
+
+ if (!loop_frame)
+ goto out;
+
+ loop_local = loop_frame->local;
+ if (loop_local) {
+ loop_sh = &loop_local->self_heal;
+ }
+
+ if (loop_sh && loop_sh->data_lock_held) {
+ afr_sh_data_unlock (loop_frame, this, this->name,
+ sh_destroy_frame);
+ } else {
+ sh_destroy_frame (loop_frame, this);
+ }
+out:
+ return 0;
+}
+
+static int
+sh_loop_lock_success (call_frame_t *loop_frame, xlator_t *this)
+{
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
+
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
+
+ sh_loop_finish (loop_sh->old_loop_frame, this);
+ loop_sh->old_loop_frame = NULL;
+
+ gf_log (this->name, GF_LOG_DEBUG, "Acquired lock for range %"PRIu64
+ " %"PRIu64, loop_sh->offset, loop_sh->block_size);
+ loop_sh->data_lock_held = _gf_true;
+ loop_sh->sh_data_algo_start (loop_frame, this);
+ return 0;
+}
+
+static int
+sh_loop_lock_failure (call_frame_t *loop_frame, xlator_t *this)
+{
+ call_frame_t *sh_frame = NULL;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
+
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
+ sh_frame = loop_sh->sh_frame;
+
+ gf_log (this->name, GF_LOG_ERROR, "failed lock for range %"PRIu64
+ " %"PRIu64, loop_sh->offset, loop_sh->block_size);
+ sh_loop_finish (loop_sh->old_loop_frame, this);
+ loop_sh->old_loop_frame = NULL;
+ sh_loop_return (sh_frame, this, loop_frame, -1, ENOTCONN);
+ return 0;
+}
+
+static int
+sh_loop_frame_create (call_frame_t *sh_frame, xlator_t *this,
+ call_frame_t *old_loop_frame, call_frame_t **loop_frame)
+{
+ call_frame_t *new_loop_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *new_loop_local = NULL;
+ afr_self_heal_t *new_loop_sh = NULL;
+ afr_private_t *priv = NULL;
+
+ GF_ASSERT (sh_frame);
+ GF_ASSERT (loop_frame);
+
+ *loop_frame = NULL;
+ local = sh_frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ new_loop_frame = copy_frame (sh_frame);
+ if (!new_loop_frame)
+ goto out;
+ //We want the frame to have same lk_owner as sh_frame
+ //so that locks translator allows conflicting locks
+ new_loop_local = afr_self_heal_local_init (local, this);
+ if (!new_loop_local)
+ goto out;
+ new_loop_frame->local = new_loop_local;
+
+ new_loop_sh = &new_loop_local->self_heal;
+ new_loop_sh->sources = memdup (sh->sources,
+ priv->child_count * sizeof (*sh->sources));
+ if (!new_loop_sh->sources)
+ goto out;
+ new_loop_sh->write_needed = GF_CALLOC (priv->child_count,
+ sizeof (*new_loop_sh->write_needed),
+ gf_afr_mt_char);
+ if (!new_loop_sh->write_needed)
+ goto out;
+ new_loop_sh->checksum = GF_CALLOC (priv->child_count, MD5_DIGEST_LENGTH,
+ gf_afr_mt_uint8_t);
+ if (!new_loop_sh->checksum)
+ goto out;
+ new_loop_sh->inode = inode_ref (sh->inode);
+ new_loop_sh->sh_data_algo_start = sh->sh_data_algo_start;
+ new_loop_sh->source = sh->source;
+ new_loop_sh->active_sinks = sh->active_sinks;
+ new_loop_sh->healing_fd = fd_ref (sh->healing_fd);
+ new_loop_sh->file_has_holes = sh->file_has_holes;
+ new_loop_sh->old_loop_frame = old_loop_frame;
+ new_loop_sh->sh_frame = sh_frame;
+ *loop_frame = new_loop_frame;
+ return 0;
+out:
+ sh_destroy_frame (new_loop_frame, this);
+ return -ENOMEM;
+}
+
+static int
+sh_loop_start (call_frame_t *sh_frame, xlator_t *this, off_t offset,
+ call_frame_t *old_loop_frame)
+{
+ call_frame_t *new_loop_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *new_loop_local = NULL;
+ afr_self_heal_t *new_loop_sh = NULL;
+ int ret = 0;
+
+ GF_ASSERT (sh_frame);
+
+ local = sh_frame->local;
+ sh = &local->self_heal;
+
+ ret = sh_loop_frame_create (sh_frame, this, old_loop_frame,
+ &new_loop_frame);
+ if (ret)
+ goto out;
+ new_loop_local = new_loop_frame->local;
+ new_loop_sh = &new_loop_local->self_heal;
+ new_loop_sh->offset = offset;
+ new_loop_sh->block_size = sh->block_size;
+ afr_sh_data_lock (new_loop_frame, this, offset, new_loop_sh->block_size,
+ _gf_true, this->name, sh_loop_lock_success, sh_loop_lock_failure);
+ return 0;
+out:
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ if (old_loop_frame)
+ sh_loop_finish (old_loop_frame, this);
+ sh_loop_return (sh_frame, this, new_loop_frame, -1, ENOMEM);
+ return 0;
+}
+
+static int
+sh_loop_driver (call_frame_t *sh_frame, xlator_t *this,
+ gf_boolean_t is_first_call, call_frame_t *old_loop_frame)
+{
+ afr_local_t * local = NULL;
+ afr_self_heal_t * sh = NULL;
+ afr_sh_algo_private_t *sh_priv = NULL;
+ gf_boolean_t is_driver_done = _gf_false;
+ blksize_t block_size = 0;
+ int loop = 0;
+ off_t offset = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ local = sh_frame->local;
+ sh = &local->self_heal;
+ sh_priv = sh->private;
+
+ LOCK (&sh_priv->lock);
+ {
+ if (!is_first_call)
+ sh_priv->loops_running--;
+ offset = sh_priv->offset;
+ block_size = sh->block_size;
+ while ((!sh->eof_reached) &&
+ (!is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) &&
+ (sh_priv->loops_running < priv->data_self_heal_window_size)
+ && (sh_priv->offset < sh->file_size)) {
+
+ loop++;
+ sh_priv->offset += block_size;
+ sh_priv->loops_running++;
+
+ if (!is_first_call)
+ break;
+ }
+ if (0 == sh_priv->loops_running) {
+ is_driver_done = _gf_true;
+ }
+ }
+ UNLOCK (&sh_priv->lock);
+
+ if (0 == loop) {
+ //loop finish does unlock, but the erasing of the pending
+ //xattrs needs to happen before that so do not finish the loop
+ if (is_driver_done &&
+ !is_self_heal_failed (sh, AFR_CHECK_SPECIFIC))
+ goto driver_done;
+ if (old_loop_frame) {
+ sh_loop_finish (old_loop_frame, this);
+ old_loop_frame = NULL;
+ }
+ }
+
+ //If we have more loops to form we should finish previous loop after
+ //the next loop lock
+ while (loop--) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
+ // op failed in other loop, stop spawning more loops
+ if (old_loop_frame) {
+ sh_loop_finish (old_loop_frame, this);
+ old_loop_frame = NULL;
+ }
+ sh_loop_driver (sh_frame, this, _gf_false, NULL);
+ } else {
+ gf_log (this->name, GF_LOG_TRACE, "spawning a loop "
+ "for offset %"PRId64, offset);
+
+ sh_loop_start (sh_frame, this, offset, old_loop_frame);
+ old_loop_frame = NULL;
+ offset += block_size;
+ }
+ }
+
+driver_done:
+ if (is_driver_done) {
+ sh_loop_driver_done (sh_frame, this, old_loop_frame);
+ }
+ return 0;
+}
+
+static int
+sh_loop_return (call_frame_t *sh_frame, xlator_t *this, call_frame_t *loop_frame,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_local_t * loop_local = NULL;
+ afr_self_heal_t * loop_sh = NULL;
+ afr_local_t * sh_local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ sh_local = sh_frame->local;
+ sh = &sh_local->self_heal;
+
+ if (loop_frame) {
+ loop_local = loop_frame->local;
+ if (loop_local)
+ loop_sh = &loop_local->self_heal;
+ if (loop_sh)
+ gf_log (this->name, GF_LOG_TRACE, "loop for offset "
+ "%"PRId64" returned", loop_sh->offset);
+ }
+
+ if (op_ret == -1) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_set_error (sh, op_errno);
+ if (loop_frame) {
+ sh_loop_finish (loop_frame, this);
+ loop_frame = NULL;
+ }
+ }
+
+ sh_loop_driver (sh_frame, this, _gf_false, loop_frame);
+
+ return 0;
+}
+
+static int
+sh_loop_write_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * loop_local = NULL;
+ afr_self_heal_t * loop_sh = NULL;
+ call_frame_t *sh_frame = NULL;
+ afr_local_t * sh_local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int call_count = 0;
+ int child_index = 0;
+
+ priv = this->private;
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
+
+ sh_frame = loop_sh->sh_frame;
+ sh_local = sh_frame->local;
+ sh = &sh_local->self_heal;
+
+ child_index = (long) cookie;
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "wrote %d bytes of data from %s to child %d, offset %"PRId64"",
+ op_ret, sh_local->loc.path, child_index, loop_sh->offset);
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "write to %s failed on subvolume %s (%s)",
+ sh_local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_set_error (loop_sh, op_errno);
+ } else if (op_ret < loop_local->cont.writev.vector->iov_len) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "incomplete write to %s on subvolume %s "
+ "(expected %lu, returned %d)", sh_local->loc.path,
+ priv->children[child_index]->name,
+ loop_local->cont.writev.vector->iov_len, op_ret);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ }
+
+ call_count = afr_frame_return (loop_frame);
+
+ if (call_count == 0) {
+ iobref_unref(loop_local->cont.writev.iobref);
+
+ sh_loop_return (sh_frame, this, loop_frame,
+ loop_sh->op_ret, loop_sh->op_errno);
+ }
+
+ return 0;
+}
+
+static void
+sh_prune_writes_needed (call_frame_t *sh_frame, call_frame_t *loop_frame,
+ afr_private_t *priv)
+{
+ afr_local_t *sh_local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
+ int i = 0;
+
+ sh_local = sh_frame->local;
+ sh = &sh_local->self_heal;
+
+ if (!strcmp (sh->algo->name, "diff"))
+ return;
+
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
+
+ /* full self-heal guarantees there exists atleast 1 file with size 0
+ * That means for other files we can preserve holes that come after
+ * its size before 'trim'
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if (loop_sh->write_needed[i] &&
+ ((loop_sh->offset + 1) > sh->buf[i].ia_size))
+ loop_sh->write_needed[i] = 0;
+ }
+}
+
+static int
+sh_loop_read_cbk (call_frame_t *loop_frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ struct iovec *vector, int32_t count, struct iatt *buf,
+ struct iobref *iobref, dict_t *xdata)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * loop_local = NULL;
+ afr_self_heal_t * loop_sh = NULL;
+ call_frame_t *sh_frame = NULL;
+ int i = 0;
+ int call_count = 0;
+ afr_local_t * sh_local = NULL;
+ afr_self_heal_t * sh = NULL;
+
+ priv = this->private;
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
+
+ sh_frame = loop_sh->sh_frame;
+ sh_local = sh_frame->local;
+ sh = &sh_local->self_heal;
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "read %d bytes of data from %s, offset %"PRId64"",
+ op_ret, loop_local->loc.path, loop_sh->offset);
+
+ if (op_ret <= 0) {
+ if (op_ret < 0) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ gf_log (this->name, GF_LOG_ERROR, "read failed on %d "
+ "for %s reason :%s", sh->source,
+ sh_local->loc.path, strerror (errno));
+ } else {
+ sh->eof_reached = _gf_true;
+ gf_log (this->name, GF_LOG_DEBUG, "Eof reached for %s",
+ sh_local->loc.path);
+ }
+ sh_loop_return (sh_frame, this, loop_frame, op_ret, op_errno);
+ goto out;
+ }
+
+ if (loop_sh->file_has_holes && iov_0filled (vector, count) == 0)
+ sh_prune_writes_needed (sh_frame, loop_frame, priv);
+
+ call_count = sh_number_of_writes_needed (loop_sh->write_needed,
+ priv->child_count);
+ if (call_count == 0) {
+ sh_loop_return (sh_frame, this, loop_frame, 0, 0);
+ goto out;
+ }
+
+ loop_local->call_count = call_count;
+
+ /*
+ * We only really need the request size at the moment, but the buffer
+ * is required if we want to issue a retry in the event of a short write.
+ * Therefore, we duplicate the vector and ref the iobref here...
+ */
+ loop_local->cont.writev.vector = iov_dup(vector, count);
+ loop_local->cont.writev.iobref = iobref_ref(iobref);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!loop_sh->write_needed[i])
+ continue;
+ STACK_WIND_COOKIE (loop_frame, sh_loop_write_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->writev,
+ loop_sh->healing_fd, vector, count,
+ loop_sh->offset, 0, iobref, NULL);
+
+ if (!--call_count)
+ break;
+ }
+
+out:
+ return 0;
+}
+
+
+static int
+sh_loop_read (call_frame_t *loop_frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
+
+ priv = this->private;
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
+
+ STACK_WIND_COOKIE (loop_frame, sh_loop_read_cbk,
+ (void *) (long) loop_sh->source,
+ priv->children[loop_sh->source],
+ priv->children[loop_sh->source]->fops->readv,
+ loop_sh->healing_fd, loop_sh->block_size,
+ loop_sh->offset, 0, NULL);
+
+ return 0;
+}
+
+
+static int
+sh_diff_checksum_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ uint32_t weak_checksum, uint8_t *strong_checksum,
+ dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
+ call_frame_t *sh_frame = NULL;
+ afr_local_t *sh_local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_sh_algo_private_t *sh_priv = NULL;
+ int child_index = 0;
+ int call_count = 0;
+ int i = 0;
+ int write_needed = 0;
+
+ priv = this->private;
+
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
+
+ sh_frame = loop_sh->sh_frame;
+ sh_local = sh_frame->local;
+ sh = &sh_local->self_heal;
+
+ sh_priv = sh->private;
+
+ child_index = (long) cookie;
+
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "checksum on %s failed on subvolume %s (%s)",
+ sh_local->loc.path, priv->children[child_index]->name,
+ strerror (op_errno));
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ } else {
+ memcpy (loop_sh->checksum + child_index * MD5_DIGEST_LENGTH,
+ strong_checksum, MD5_DIGEST_LENGTH);
+ }
+
+ call_count = afr_frame_return (loop_frame);
+
+ if (call_count == 0) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->sources[i] || !sh_local->child_up[i])
+ continue;
+
+ if (memcmp (loop_sh->checksum + (i * MD5_DIGEST_LENGTH),
+ loop_sh->checksum + (sh->source * MD5_DIGEST_LENGTH),
+ MD5_DIGEST_LENGTH)) {
+ /*
+ Checksums differ, so this block
+ must be written to this sink
+ */
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "checksum on subvolume %s at offset %"
+ PRId64" differs from that on source",
+ priv->children[i]->name, loop_sh->offset);
+
+ write_needed = loop_sh->write_needed[i] = 1;
+ }
+ }
+
+ LOCK (&sh_priv->lock);
+ {
+ sh_priv->total_blocks++;
+ if (write_needed)
+ sh_priv->diff_blocks++;
+ }
+ UNLOCK (&sh_priv->lock);
+
+ if (write_needed &&
+ !is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
+ sh_loop_read (loop_frame, this);
+ } else {
+ sh_loop_return (sh_frame, this, loop_frame,
+ op_ret, op_errno);
+ }
+ }
+
+ return 0;
+}
+
+static int
+sh_diff_checksum (call_frame_t *loop_frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
+ int call_count = 0;
+ int i = 0;
+
+ priv = this->private;
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
+
+ call_count = loop_sh->active_sinks + 1; /* sinks and source */
+
+ loop_local->call_count = call_count;
+
+ STACK_WIND_COOKIE (loop_frame, sh_diff_checksum_cbk,
+ (void *) (long) loop_sh->source,
+ priv->children[loop_sh->source],
+ priv->children[loop_sh->source]->fops->rchecksum,
+ loop_sh->healing_fd,
+ loop_sh->offset, loop_sh->block_size, NULL);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (loop_sh->sources[i] || !loop_local->child_up[i])
+ continue;
+
+ STACK_WIND_COOKIE (loop_frame, sh_diff_checksum_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->rchecksum,
+ loop_sh->healing_fd,
+ loop_sh->offset, loop_sh->block_size, NULL);
+
+ if (!--call_count)
+ break;
+ }
+
+ return 0;
+}
+
+static int
+sh_full_read_write_to_sinks (call_frame_t *loop_frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
+ int i = 0;
+
+ priv = this->private;
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (loop_sh->sources[i] || !loop_local->child_up[i])
+ continue;
+ loop_sh->write_needed[i] = 1;
+ }
+ sh_loop_read (loop_frame, this);
+ return 0;
+}
+
+afr_sh_algo_private_t*
+afr_sh_priv_init ()
+{
+ afr_sh_algo_private_t *sh_priv = NULL;
+
+ sh_priv = GF_CALLOC (1, sizeof (*sh_priv),
+ gf_afr_mt_afr_private_t);
+ if (!sh_priv)
+ goto out;
+
+ LOCK_INIT (&sh_priv->lock);
+out:
+ return sh_priv;
+}
+
+int
+afr_sh_transfer_lock (call_frame_t *dst, call_frame_t *src, char *dom,
+ unsigned int child_count)
+{
+ afr_local_t *dst_local = NULL;
+ afr_self_heal_t *dst_sh = NULL;
+ afr_local_t *src_local = NULL;
+ afr_self_heal_t *src_sh = NULL;
+ int ret = -1;
+
+ dst_local = dst->local;
+ dst_sh = &dst_local->self_heal;
+ src_local = src->local;
+ src_sh = &src_local->self_heal;
+ GF_ASSERT (src_sh->data_lock_held);
+ GF_ASSERT (!dst_sh->data_lock_held);
+ ret = afr_lk_transfer_datalock (dst, src, dom, child_count);
+ if (ret)
+ return ret;
+ src_sh->data_lock_held = _gf_false;
+ dst_sh->data_lock_held = _gf_true;
+ return 0;
+}
+
+int
+afr_sh_start_loops (call_frame_t *sh_frame, xlator_t *this,
+ afr_sh_algo_fn sh_data_algo_start)
+{
+ call_frame_t *first_loop_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int ret = 0;
+ afr_private_t *priv = NULL;
+
+ local = sh_frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ sh->sh_data_algo_start = sh_data_algo_start;
+ local->call_count = 0;
+ ret = sh_loop_frame_create (sh_frame, this, NULL, &first_loop_frame);
+ if (ret)
+ goto out;
+ ret = afr_sh_transfer_lock (first_loop_frame, sh_frame, this->name,
+ priv->child_count);
+ if (ret)
+ goto out;
+ sh->private = afr_sh_priv_init ();
+ if (!sh->private) {
+ ret = -1;
+ goto out;
+ }
+ sh_loop_driver (sh_frame, this, _gf_true, first_loop_frame);
+ ret = 0;
+out:
+ if (ret) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ sh_loop_driver_done (sh_frame, this, NULL);
+ }
+ return 0;
+}
+
+int
+afr_sh_algo_diff (call_frame_t *sh_frame, xlator_t *this)
+{
+ afr_sh_start_loops (sh_frame, this, sh_diff_checksum);
+ return 0;
+}
+
+int
+afr_sh_algo_full (call_frame_t *sh_frame, xlator_t *this)
+{
+ afr_sh_start_loops (sh_frame, this, sh_full_read_write_to_sinks);
+ return 0;
+}
+
+struct afr_sh_algorithm afr_self_heal_algorithms[] = {
+ {.name = "full", .fn = afr_sh_algo_full},
+ {.name = "diff", .fn = afr_sh_algo_diff},
+ {0, 0},
+};
diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.h b/xlators/cluster/afr/src/afr-self-heal-algorithm.h
new file mode 100644
index 000000000..6b20789b1
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-self-heal-algorithm.h
@@ -0,0 +1,32 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __AFR_SELF_HEAL_ALGORITHM_H__
+#define __AFR_SELF_HEAL_ALGORITHM_H__
+
+typedef int (*afr_sh_algo_fn) (call_frame_t *frame,
+ xlator_t *this);
+
+struct afr_sh_algorithm {
+ const char *name;
+ afr_sh_algo_fn fn;
+};
+
+extern struct afr_sh_algorithm afr_self_heal_algorithms[3];
+typedef struct {
+ gf_lock_t lock;
+ unsigned int loops_running;
+ off_t offset;
+
+ int32_t total_blocks;
+ int32_t diff_blocks;
+} afr_sh_algo_private_t;
+
+#endif /* __AFR_SELF_HEAL_ALGORITHM_H__ */
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 5f08fb007..ef92b4205 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include "glusterfs.h"
@@ -25,7 +16,68 @@
#include "afr-transaction.h"
#include "afr-self-heal-common.h"
#include "afr-self-heal.h"
+#include "pump.h"
+
+#define ADD_FMT_STRING(msg, off, sh_str, status, print_log) \
+ do { \
+ if (AFR_SELF_HEAL_NOT_ATTEMPTED != status) { \
+ off += snprintf (msg + off, sizeof (msg) - off, \
+ " "sh_str" self heal %s,", \
+ get_sh_completion_status (status));\
+ print_log = 1; \
+ } \
+ } while (0)
+
+#define ADD_FMT_STRING_SYNC(msg, off, sh_str, status, print_log) \
+ do { \
+ if (AFR_SELF_HEAL_SYNC_BEGIN == status || \
+ AFR_SELF_HEAL_FAILED == status) { \
+ off += snprintf (msg + off, sizeof (msg) - off, \
+ " "sh_str" self heal %s,", \
+ get_sh_completion_status (status));\
+ print_log = 1; \
+ } \
+ } while (0)
+
+
+void
+afr_sh_reset (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ memset (sh->child_errno, 0,
+ sizeof (*sh->child_errno) * priv->child_count);
+ memset (sh->buf, 0, sizeof (*sh->buf) * priv->child_count);
+ memset (sh->parentbufs, 0,
+ sizeof (*sh->parentbufs) * priv->child_count);
+ memset (sh->success, 0, sizeof (*sh->success) * priv->child_count);
+ memset (sh->locked_nodes, 0,
+ sizeof (*sh->locked_nodes) * priv->child_count);
+ sh->active_sinks = 0;
+
+ afr_reset_xattr (sh->xattr, priv->child_count);
+}
+//Intersection[child]=1 if child is part of intersection
+void
+afr_children_intersection_get (int32_t *set1, int32_t *set2,
+ int *intersection, unsigned int child_count)
+{
+ int i = 0;
+
+ memset (intersection, 0, sizeof (*intersection) * child_count);
+ for (i = 0; i < child_count; i++) {
+ intersection[i] = afr_is_child_present (set1, child_count, i)
+ && afr_is_child_present (set2, child_count,
+ i);
+ }
+}
/**
* select_source - select a source and return it
@@ -34,115 +86,215 @@
int
afr_sh_select_source (int sources[], int child_count)
{
- int i;
- for (i = 0; i < child_count; i++)
- if (sources[i])
- return i;
+ int i = 0;
+ for (i = 0; i < child_count; i++)
+ if (sources[i])
+ return i;
- return -1;
+ return -1;
}
+void
+afr_sh_mark_source_sinks (call_frame_t *frame, xlator_t *this)
+{
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int active_sinks = 0;
-/**
- * sink_count - return number of sinks in sources array
- */
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
-int
-afr_sh_sink_count (int sources[], int child_count)
-{
- int i;
- int sinks = 0;
- for (i = 0; i < child_count; i++)
- if (!sources[i])
- sinks++;
- return sinks;
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->sources[i] == 0 && local->child_up[i] == 1) {
+ active_sinks++;
+ sh->success[i] = 1;
+ } else if (sh->sources[i] == 1 && local->child_up[i] == 1) {
+ sh->success[i] = 1;
+ }
+ }
+ sh->active_sinks = active_sinks;
}
int
afr_sh_source_count (int sources[], int child_count)
{
- int i;
- int nsource = 0;
+ int i = 0;
+ int nsource = 0;
- for (i = 0; i < child_count; i++)
- if (sources[i])
- nsource++;
- return nsource;
+ for (i = 0; i < child_count; i++)
+ if (sources[i])
+ nsource++;
+ return nsource;
}
+void
+afr_sh_set_error (afr_self_heal_t *sh, int32_t op_errno)
+{
+ sh->op_ret = -1;
+ sh->op_errno = afr_most_important_error(sh->op_errno, op_errno,
+ _gf_false);
+}
-int
-afr_sh_supress_errenous_children (int sources[], int child_errno[],
- int child_count)
+void
+afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this)
{
- int i = 0;
+ afr_private_t * priv = this->private;
+ char *buf = NULL;
+ char *ptr = NULL;
+ int i = 0;
+ int j = 0;
- for (i = 0; i < child_count; i++) {
- if (child_errno[i] && sources[i]) {
- sources[i] = 0;
- }
- }
+ /* 10 digits per entry + 1 space + '[' and ']' */
+ buf = GF_MALLOC (priv->child_count * 11 + 8, gf_afr_mt_char);
- return 0;
+ for (i = 0; i < priv->child_count; i++) {
+ ptr = buf;
+ ptr += sprintf (ptr, "[ ");
+ for (j = 0; j < priv->child_count; j++) {
+ ptr += sprintf (ptr, "%d ", pending_matrix[i][j]);
+ }
+ sprintf (ptr, "]");
+ gf_log (this->name, GF_LOG_DEBUG, "pending_matrix: %s", buf);
+ }
+
+ GF_FREE (buf);
}
+char*
+afr_get_pending_matrix_str (int32_t *pending_matrix[], xlator_t *this)
+{
+ afr_private_t * priv = this->private;
+ char *buf = NULL;
+ char *ptr = NULL;
+ int i = 0;
+ int j = 0;
+ int child_count = priv->child_count;
+ char *matrix_begin = "[ [ ";
+ char *matrix_end = "] ]";
+ char *seperator = "] [ ";
+ int pending_entry_strlen = 12; //Including space after entry
+ int matrix_begin_strlen = 0;
+ int matrix_end_strlen = 0;
+ int seperator_strlen = 0;
+ int string_length = 0;
+ char *msg = "- Pending matrix: ";
+
+ /*
+ * for a list of lists of [ [ a b ] [ c d ] ]
+ * */
+
+ matrix_begin_strlen = strlen (matrix_begin);
+ matrix_end_strlen = strlen (matrix_end);
+ seperator_strlen = strlen (seperator);
+ string_length = matrix_begin_strlen + matrix_end_strlen
+ + (child_count -1) * seperator_strlen
+ + (child_count * child_count * pending_entry_strlen);
+
+ buf = GF_CALLOC (1, 1 + strlen (msg) + string_length , gf_afr_mt_char);
+ if (!buf)
+ goto out;
+
+ ptr = buf;
+ ptr += sprintf (ptr, "%s", msg);
+ ptr += sprintf (ptr, "%s", matrix_begin);
+ for (i = 0; i < priv->child_count; i++) {
+ for (j = 0; j < priv->child_count; j++) {
+ ptr += sprintf (ptr, "%d ", pending_matrix[i][j]);
+ }
+ if (i < priv->child_count -1)
+ ptr += sprintf (ptr, "%s", seperator);
+ }
+
+ ptr += sprintf (ptr, "%s", matrix_end);
+
+out:
+ return buf;
+}
void
-afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this)
+afr_sh_print_split_brain_log (int32_t *pending_matrix[], xlator_t *this,
+ const char *loc)
{
- afr_private_t * priv = this->private;
+ char *buf = NULL;
+ char *free_ptr = NULL;
- char *buf = NULL;
- char *ptr = NULL;
+ buf = afr_get_pending_matrix_str (pending_matrix, this);
+ if (buf)
+ free_ptr = buf;
+ else
+ buf = "";
- int i, j;
- /* 10 digits per entry + 1 space + '[' and ']' */
- buf = MALLOC (priv->child_count * 11 + 8);
+ gf_log (this->name, GF_LOG_ERROR, "Unable to self-heal contents of '%s'"
+ " (possible split-brain). Please delete the file from all but "
+ "the preferred subvolume.%s", loc, buf);
+ GF_FREE (free_ptr);
+ return;
+}
- for (i = 0; i < priv->child_count; i++) {
- ptr = buf;
- ptr += sprintf (ptr, "[ ");
- for (j = 0; j < priv->child_count; j++) {
- ptr += sprintf (ptr, "%d ", pending_matrix[i][j]);
- }
- ptr += sprintf (ptr, "]");
- gf_log (this->name, GF_LOG_TRACE,
- "pending_matrix: %s", buf);
- }
- FREE (buf);
-}
+void
+afr_init_pending_matrix (int32_t **pending_matrix, size_t child_count)
+{
+ int i = 0;
+ int j = 0;
+ GF_ASSERT (pending_matrix);
+
+ for (i = 0; i < child_count; i++) {
+ for (j = 0; j < child_count; j++) {
+ pending_matrix[i][j] = 0;
+ }
+ }
+}
void
-afr_sh_build_pending_matrix (afr_private_t *priv,
- int32_t *pending_matrix[], dict_t *xattr[],
- int child_count, afr_transaction_type type)
+afr_mark_ignorant_subvols_as_pending (int32_t **pending_matrix,
+ unsigned char *ignorant_subvols,
+ size_t child_count)
{
- int i, j, k;
+ int i = 0;
+ int j = 0;
- int32_t *pending = NULL;
- int ret = -1;
+ GF_ASSERT (pending_matrix);
+ GF_ASSERT (ignorant_subvols);
- unsigned char *ignorant_subvols = NULL;
+ for (i = 0; i < child_count; i++) {
+ if (ignorant_subvols[i]) {
+ for (j = 0; j < child_count; j++) {
+ if (!ignorant_subvols[j])
+ pending_matrix[j][i] += 1;
+ }
+ }
+ }
+}
- ignorant_subvols = CALLOC (sizeof (*ignorant_subvols), child_count);
+int
+afr_build_pending_matrix (char **pending_key, int32_t **pending_matrix,
+ unsigned char *ignorant_subvols,
+ dict_t *xattr[], afr_transaction_type type,
+ size_t child_count)
+{
+ /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
+ int32_t pending[3] = {0,};
+ void *pending_raw = NULL;
+ int ret = -1;
+ int i = 0;
+ int j = 0;
+ int k = 0;
- /* start clean */
- for (i = 0; i < child_count; i++) {
- for (j = 0; j < child_count; j++) {
- pending_matrix[i][j] = 0;
- }
- }
+ afr_init_pending_matrix (pending_matrix, child_count);
- for (i = 0; i < child_count; i++) {
- pending = NULL;
+ for (i = 0; i < child_count; i++) {
+ pending_raw = NULL;
for (j = 0; j < child_count; j++) {
- ret = dict_get_ptr (xattr[i], priv->pending_key[j],
- VOID(&pending));
-
+ ret = dict_get_ptr (xattr[i], pending_key[j],
+ &pending_raw);
+
if (ret != 0) {
/*
* There is no xattr present. This means this
@@ -150,62 +302,26 @@ afr_sh_build_pending_matrix (afr_private_t *priv,
* subvolume.
*/
- ignorant_subvols[i] = 1;
+ if (ignorant_subvols)
+ ignorant_subvols[i] = 1;
continue;
}
+ memcpy (pending, pending_raw, sizeof(pending));
k = afr_index_for_transaction_type (type);
-
- pending_matrix[i][j] = ntoh32 (pending[k]);
- }
- }
- /*
- * Make all non-ignorant subvols point towards the ignorant
- * subvolumes.
- */
-
- for (i = 0; i < child_count; i++) {
- if (ignorant_subvols[i]) {
- for (j = 0; j < child_count; j++) {
- if (!ignorant_subvols[j])
- pending_matrix[j][i] += 1;
- }
+ pending_matrix[i][j] = ntoh32 (pending[k]);
}
}
- FREE (ignorant_subvols);
+ return ret;
}
-
-/**
- * mark_sources: Mark all 'source' nodes and return number of source
- * nodes found
- *
- * A node (a row in the pending matrix) belongs to one of
- * three categories:
- *
- * M is the pending matrix.
- *
- * 'innocent' - M[i] is all zeroes
- * 'fool' - M[i] has i'th element = 1 (self-reference)
- * 'wise' - M[i] has i'th element = 0, others are 1 or 0.
- *
- * All 'innocent' nodes are sinks. If all nodes are innocent, no self-heal is
- * needed.
- *
- * A 'wise' node can be a source. If two 'wise' nodes conflict, it is
- * a split-brain. If one wise node refers to the other but the other doesn't
- * refer back, the referrer is a source.
- *
- * All fools are sinks, unless there are no 'wise' nodes. In that case,
- * one of the fools is made a source.
- */
-
typedef enum {
+ AFR_NODE_INVALID,
AFR_NODE_INNOCENT,
AFR_NODE_FOOL,
- AFR_NODE_WISE
+ AFR_NODE_WISE,
} afr_node_type;
typedef struct {
@@ -246,7 +362,7 @@ afr_sh_is_wise (int32_t *array, int i, int child_count)
static int
-afr_sh_all_nodes_innocent (afr_node_character *characters,
+afr_sh_all_nodes_innocent (afr_node_character *characters,
int child_count)
{
int i = 0;
@@ -282,10 +398,10 @@ afr_sh_wise_nodes_exist (afr_node_character *characters, int child_count)
/*
* The 'wisdom' of a wise node is 0 if any other wise node accuses it.
- * It is 1 if no other wise node accuses it.
+ * It is 1 if no other wise node accuses it.
* Only wise nodes with wisdom 1 are sources.
*
- * If no nodes with wisdom 1 exist, a split-brain has occured.
+ * If no nodes with wisdom 1 exist, a split-brain has occurred.
*/
static void
@@ -302,7 +418,7 @@ afr_sh_compute_wisdom (int32_t *pending_matrix[],
for (j = 0; j < child_count; j++) {
if ((characters[j].type == AFR_NODE_WISE)
&& pending_matrix[j][i]) {
-
+
characters[i].wisdom = 0;
}
}
@@ -312,7 +428,7 @@ afr_sh_compute_wisdom (int32_t *pending_matrix[],
static int
-afr_sh_wise_nodes_conflict (afr_node_character *characters,
+afr_sh_wise_nodes_conflict (afr_node_character *characters,
int child_count)
{
int i = 0;
@@ -333,13 +449,12 @@ afr_sh_wise_nodes_conflict (afr_node_character *characters,
static int
-afr_sh_mark_wisest_as_sources (int sources[],
- afr_node_character *characters,
+afr_sh_mark_wisest_as_sources (int sources[],
+ afr_node_character *characters,
int child_count)
{
int nsources = 0;
-
- int i = 0;
+ int i = 0;
for (i = 0; i < child_count; i++) {
if (characters[i].wisdom == 1) {
@@ -351,1018 +466,2347 @@ afr_sh_mark_wisest_as_sources (int sources[],
return nsources;
}
-
-static int
-afr_sh_mark_if_size_differs (afr_self_heal_t *sh, int child_count)
+static void
+afr_compute_witness_of_fools (int32_t *witnesses, int32_t **pending_matrix,
+ afr_node_character *characters,
+ int32_t child_count)
{
- int32_t ** pending_matrix;
- int i, j;
-
- int size_differs = 0;
+ int i = 0;
+ int j = 0;
+ int witness = 0;
- pending_matrix = sh->pending_matrix;
+ GF_ASSERT (witnesses);
+ GF_ASSERT (pending_matrix);
+ GF_ASSERT (characters);
+ GF_ASSERT (child_count > 0);
for (i = 0; i < child_count; i++) {
+ if (characters[i].type != AFR_NODE_FOOL)
+ continue;
+
+ witness = 0;
for (j = 0; j < child_count; j++) {
- if (SIZE_DIFFERS (&sh->buf[i], &sh->buf[j])
- && (pending_matrix[i][j] == 0)
- && (pending_matrix[j][i] == 0)) {
-
- pending_matrix[i][j] = 1;
- pending_matrix[j][i] = 1;
-
- size_differs = 1;
- }
+ if (i == j)
+ continue;
+ witness += pending_matrix[i][j];
}
+ witnesses[i] = witness;
}
-
- return size_differs;
}
-
-static int
-afr_sh_mark_biggest_fool_as_source (afr_self_heal_t *sh,
- afr_node_character *characters,
- int child_count)
+static int32_t
+afr_find_biggest_witness_among_fools (int32_t *witnesses,
+ afr_node_character *characters,
+ int32_t child_count)
{
- int i = 0;
- int biggest = 0;
-
+ int i = 0;
+ int biggest_witness = -1;
+ int biggest_witness_idx = -1;
+ int biggest_witness_cnt = -1;
+
+ GF_ASSERT (witnesses);
+ GF_ASSERT (characters);
+ GF_ASSERT (child_count > 0);
+
for (i = 0; i < child_count; i++) {
- if (characters[i].type == AFR_NODE_FOOL) {
- biggest = i;
- break;
- }
+ if (characters[i].type != AFR_NODE_FOOL)
+ continue;
+
+ if (biggest_witness < witnesses[i]) {
+ biggest_witness = witnesses[i];
+ biggest_witness_idx = i;
+ biggest_witness_cnt = 1;
+ continue;
+ }
+
+ if (biggest_witness == witnesses[i])
+ biggest_witness_cnt++;
}
+ if (biggest_witness_cnt != 1)
+ return -1;
+
+ return biggest_witness_idx;
+}
+
+int
+afr_mark_fool_as_source_by_witness (int32_t *sources, int32_t *witnesses,
+ afr_node_character *characters,
+ int32_t child_count, int32_t witness)
+{
+ int i = 0;
+ int nsources = 0;
+
+ GF_ASSERT (sources);
+ GF_ASSERT (witnesses);
+ GF_ASSERT (characters);
+ GF_ASSERT (child_count > 0);
+
for (i = 0; i < child_count; i++) {
if (characters[i].type != AFR_NODE_FOOL)
continue;
-
- if (SIZE_GREATER (&sh->buf[i], &sh->buf[biggest])) {
- biggest = i;
+
+ if (witness == witnesses[i]) {
+ sources[i] = 1;
+ nsources++;
}
}
+ return nsources;
+}
- sh->sources[biggest] = 1;
- return 1;
+int
+afr_mark_fool_as_source_by_idx (int32_t *sources, int child_count, int idx)
+{
+ if (idx >= 0 && idx < child_count) {
+ sources[idx] = 1;
+ return 1;
+ }
+ return 0;
}
static int
-afr_sh_mark_biggest_as_source (afr_self_heal_t *sh, int child_count)
+afr_find_largest_file_size (struct iatt *bufs, int32_t *success_children,
+ int child_count)
{
- int biggest = 0;
- int i;
+ int idx = -1;
+ int i = -1;
+ int child = -1;
+ uint64_t max_size = 0;
+ uint64_t min_size = 0;
+ int num_children = 0;
- for (i = 0; i < child_count; i++) {
- if (SIZE_GREATER (&sh->buf[i], &sh->buf[biggest])) {
- biggest = i;
+ for (i = 0; i < child_count; i++) {
+ if (success_children[i] == -1)
+ break;
+
+ child = success_children[i];
+ if (bufs[child].ia_size > max_size) {
+ max_size = bufs[child].ia_size;
+ idx = child;
+ }
+
+ if ((num_children == 0) || (bufs[child].ia_size < min_size)) {
+ min_size = bufs[child].ia_size;
}
+
+ num_children++;
+ }
+
+ /* If sizes are same for all of them, finding sources will have to
+ * happen with pending changelog. So return -1
+ */
+ if ((num_children > 1) && (min_size == max_size))
+ return -1;
+ return idx;
+}
+
+
+static int
+afr_find_newest_file (struct iatt *bufs, int32_t *success_children,
+ int child_count)
+{
+ int idx = -1;
+ int i = -1;
+ int child = -1;
+ uint64_t max_ctime = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (success_children[i] == -1)
+ break;
+
+ child = success_children[i];
+ if (bufs[child].ia_ctime > max_ctime) {
+ max_ctime = bufs[child].ia_ctime;
+ idx = child;
+ }
+ }
+
+ return idx;
+}
+
+
+static int
+afr_mark_biggest_of_fools_as_source (int32_t *sources, int32_t **pending_matrix,
+ afr_node_character *characters,
+ int32_t *success_children,
+ int child_count, struct iatt *bufs)
+{
+ int32_t biggest_witness = 0;
+ int nsources = 0;
+ int32_t *witnesses = NULL;
+
+ GF_ASSERT (child_count > 0);
+
+ biggest_witness = afr_find_largest_file_size (bufs, success_children,
+ child_count);
+ if (biggest_witness != -1)
+ goto found;
+
+ witnesses = GF_CALLOC (child_count, sizeof (*witnesses),
+ gf_afr_mt_int32_t);
+ if (NULL == witnesses) {
+ nsources = -1;
+ goto out;
}
- sh->sources[biggest] = 1;
+ afr_compute_witness_of_fools (witnesses, pending_matrix, characters,
+ child_count);
+ biggest_witness = afr_find_biggest_witness_among_fools (witnesses,
+ characters,
+ child_count);
+ if (biggest_witness != -1)
+ goto found;
+
+ biggest_witness = afr_find_newest_file (bufs, success_children,
+ child_count);
- return 1;
+found:
+ nsources = afr_mark_fool_as_source_by_idx (sources, child_count,
+ biggest_witness);
+out:
+ GF_FREE (witnesses);
+ return nsources;
}
+int
+afr_mark_child_as_source_by_uid (int32_t *sources, struct iatt *bufs,
+ int32_t *success_children,
+ unsigned int child_count, uint32_t uid)
+{
+ int i = 0;
+ int nsources = 0;
+ int child = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (-1 == success_children[i])
+ break;
+
+ child = success_children[i];
+ if (uid == bufs[child].ia_uid) {
+ sources[child] = 1;
+ nsources++;
+ }
+ }
+ return nsources;
+}
int
-afr_sh_mark_sources (afr_self_heal_t *sh, int child_count,
- afr_self_heal_type type)
+afr_get_child_with_lowest_uid (struct iatt *bufs, int32_t *success_children,
+ unsigned int child_count)
{
- int i = 0;
+ int i = 0;
+ int smallest = -1;
+ int child = 0;
- int32_t ** pending_matrix;
- int * sources;
+ for (i = 0; i < child_count; i++) {
+ if (-1 == success_children[i])
+ break;
+ child = success_children[i];
+ if ((smallest == -1) ||
+ (bufs[child].ia_uid < bufs[smallest].ia_uid)) {
+ smallest = child;
+ }
+ }
+ return smallest;
+}
- int size_differs = 0;
+static int
+afr_sh_mark_lowest_uid_as_source (struct iatt *bufs, int32_t *success_children,
+ int child_count, int32_t *sources)
+{
+ int nsources = 0;
+ int smallest = 0;
+
+ smallest = afr_get_child_with_lowest_uid (bufs, success_children,
+ child_count);
+ if (smallest < 0) {
+ nsources = -1;
+ goto out;
+ }
+ nsources = afr_mark_child_as_source_by_uid (sources, bufs,
+ success_children, child_count,
+ bufs[smallest].ia_uid);
+out:
+ return nsources;
+}
- pending_matrix = sh->pending_matrix;
- sources = sh->sources;
+int
+afr_get_no_xattr_dir_read_child (xlator_t *this, int32_t *success_children,
+ struct iatt *bufs)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int child = -1;
+ int read_child = -1;
- int nsources = 0;
+ priv = this->private;
+ for (i = 0; i < priv->child_count; i++) {
+ child = success_children[i];
+ if (child < 0)
+ break;
+ if (read_child < 0)
+ read_child = child;
+ else if (bufs[read_child].ia_size < bufs[child].ia_size)
+ read_child = child;
+ }
+ return read_child;
+}
- /* stores the 'characters' (innocent, fool, wise) of the nodes */
- afr_node_character *
- characters = CALLOC (sizeof (afr_node_character),
- child_count);
+int
+afr_sh_mark_zero_size_file_as_sink (struct iatt *bufs, int32_t *success_children,
+ int child_count, int32_t *sources)
+{
+ int nsources = 0;
+ int i = 0;
+ int child = 0;
+ gf_boolean_t sink_exists = _gf_false;
+ gf_boolean_t source_exists = _gf_false;
+ int source = -1;
- /* start clean */
- for (i = 0; i < child_count; i++) {
- sources[i] = 0;
- }
-
for (i = 0; i < child_count; i++) {
- if (afr_sh_is_innocent (pending_matrix[i], child_count)) {
- characters[i].type = AFR_NODE_INNOCENT;
-
- } else if (afr_sh_is_fool (pending_matrix[i], i, child_count)) {
- characters[i].type = AFR_NODE_FOOL;
+ child = success_children[i];
+ if (child < 0)
+ break;
+ if (!bufs[child].ia_size) {
+ sink_exists = _gf_true;
+ continue;
+ }
+ if (!source_exists) {
+ source_exists = _gf_true;
+ source = child;
+ continue;
+ }
+ if (bufs[source].ia_size != bufs[child].ia_size) {
+ nsources = -1;
+ goto out;
+ }
+ }
+ if (!source_exists && !sink_exists) {
+ nsources = -1;
+ goto out;
+ }
- } else if (afr_sh_is_wise (pending_matrix[i], i, child_count)) {
- characters[i].type = AFR_NODE_WISE;
+ if (!source_exists || !sink_exists)
+ goto out;
- } else {
- gf_log ("[module:replicate]", GF_LOG_ERROR,
- "Could not determine the state of subvolume %d!"
- " (This message should never appear."
- " Please file a bug report to "
- "<gluster-devel@nongnu.org>.)", i);
+ for (i = 0; i < child_count; i++) {
+ child = success_children[i];
+ if (child < 0)
+ break;
+ if (bufs[child].ia_size) {
+ sources[child] = 1;
+ nsources++;
}
}
+out:
+ return nsources;
+}
+
+char *
+afr_get_character_str (afr_node_type type)
+{
+ char *character = NULL;
+
+ switch (type) {
+ case AFR_NODE_INNOCENT:
+ character = "innocent";
+ break;
+ case AFR_NODE_FOOL:
+ character = "fool";
+ break;
+ case AFR_NODE_WISE:
+ character = "wise";
+ break;
+ default:
+ character = "invalid";
+ break;
+ }
+ return character;
+}
+
+afr_node_type
+afr_find_child_character_type (int32_t *pending_row, int32_t child,
+ unsigned int child_count)
+{
+ afr_node_type type = AFR_NODE_INVALID;
+
+ GF_ASSERT ((child >= 0) && (child < child_count));
+
+ if (afr_sh_is_innocent (pending_row, child_count))
+ type = AFR_NODE_INNOCENT;
+ else if (afr_sh_is_fool (pending_row, child, child_count))
+ type = AFR_NODE_FOOL;
+ else if (afr_sh_is_wise (pending_row, child, child_count))
+ type = AFR_NODE_WISE;
+ return type;
+}
- if (type == AFR_SELF_HEAL_DATA) {
- size_differs = afr_sh_mark_if_size_differs (sh, child_count);
+int
+afr_build_sources (xlator_t *this, dict_t **xattr, struct iatt *bufs,
+ int32_t **pending_matrix, int32_t *sources,
+ int32_t *success_children, afr_transaction_type type,
+ int32_t *subvol_status, gf_boolean_t ignore_ignorant)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heal_type sh_type = AFR_SELF_HEAL_INVALID;
+ int nsources = -1;
+ unsigned char *ignorant_subvols = NULL;
+ unsigned int child_count = 0;
+
+ priv = this->private;
+ child_count = priv->child_count;
+
+ if (afr_get_children_count (success_children, priv->child_count) == 0)
+ goto out;
+
+ if (!ignore_ignorant) {
+ ignorant_subvols = GF_CALLOC (sizeof (*ignorant_subvols),
+ child_count, gf_afr_mt_char);
+ if (NULL == ignorant_subvols)
+ goto out;
+ }
+
+ afr_build_pending_matrix (priv->pending_key, pending_matrix,
+ ignorant_subvols, xattr, type,
+ priv->child_count);
+
+ if (!ignore_ignorant)
+ afr_mark_ignorant_subvols_as_pending (pending_matrix,
+ ignorant_subvols,
+ priv->child_count);
+ sh_type = afr_self_heal_type_for_transaction (type);
+ if (AFR_SELF_HEAL_INVALID == sh_type)
+ goto out;
+
+ afr_sh_print_pending_matrix (pending_matrix, this);
+
+ nsources = afr_mark_sources (this, sources, pending_matrix, bufs,
+ sh_type, success_children, subvol_status);
+out:
+ GF_FREE (ignorant_subvols);
+ return nsources;
+}
+
+void
+afr_find_character_types (afr_node_character *characters,
+ int32_t **pending_matrix, int32_t *success_children,
+ unsigned int child_count)
+{
+ afr_node_type type = AFR_NODE_INVALID;
+ int child = 0;
+ int i = 0;
+
+ for (i = 0; i < child_count; i++) {
+ child = success_children[i];
+ if (child == -1)
+ break;
+ type = afr_find_child_character_type (pending_matrix[child],
+ child, child_count);
+ characters[child].type = type;
+ }
+}
+
+void
+afr_mark_success_children_sources (int32_t *sources, int32_t *success_children,
+ unsigned int child_count)
+{
+ int i = 0;
+ for (i = 0; i < child_count; i++) {
+ if (success_children[i] == -1)
+ break;
+ sources[success_children[i]] = 1;
}
+}
+/**
+ * mark_sources: Mark all 'source' nodes and return number of source
+ * nodes found
+ *
+ * A node (a row in the pending matrix) belongs to one of
+ * three categories:
+ *
+ * M is the pending matrix.
+ *
+ * 'innocent' - M[i] is all zeroes
+ * 'fool' - M[i] has i'th element = 1 (self-reference)
+ * 'wise' - M[i] has i'th element = 0, others are 1 or 0.
+ *
+ * All 'innocent' nodes are sinks. If all nodes are innocent, no self-heal is
+ * needed.
+ *
+ * A 'wise' node can be a source. If two 'wise' nodes conflict, it is
+ * a split-brain. If one wise node refers to the other but the other doesn't
+ * refer back, the referrer is a source.
+ *
+ * All fools are sinks, unless there are no 'wise' nodes. In that case,
+ * one of the fools is made a source.
+ */
+int
+afr_mark_sources (xlator_t *this, int32_t *sources, int32_t **pending_matrix,
+ struct iatt *bufs, afr_self_heal_type type,
+ int32_t *success_children, int32_t *subvol_status)
+{
+ /* stores the 'characters' (innocent, fool, wise) of the nodes */
+ afr_node_character *characters = NULL;
+ int nsources = -1;
+ unsigned int child_count = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ child_count = priv->child_count;
+ characters = GF_CALLOC (sizeof (afr_node_character),
+ child_count, gf_afr_mt_afr_node_character);
+ if (!characters)
+ goto out;
+
+ this = THIS;
+
+ /* start clean */
+ memset (sources, 0, sizeof (*sources) * child_count);
+ nsources = 0;
+ afr_find_character_types (characters, pending_matrix, success_children,
+ child_count);
if (afr_sh_all_nodes_innocent (characters, child_count)) {
- if (size_differs) {
- nsources = afr_sh_mark_biggest_as_source (sh,
- child_count);
+ switch (type) {
+ case AFR_SELF_HEAL_METADATA:
+ nsources = afr_sh_mark_lowest_uid_as_source (bufs,
+ success_children,
+ child_count,
+ sources);
+ break;
+ case AFR_SELF_HEAL_DATA:
+ nsources = afr_sh_mark_zero_size_file_as_sink (bufs,
+ success_children,
+ child_count,
+ sources);
+ if ((nsources < 0) && subvol_status)
+ *subvol_status |= SPLIT_BRAIN;
+ break;
+ default:
+ break;
}
+ goto out;
+ }
- } else if (afr_sh_wise_nodes_exist (characters, child_count)) {
+ if (afr_sh_wise_nodes_exist (characters, child_count)) {
afr_sh_compute_wisdom (pending_matrix, characters, child_count);
if (afr_sh_wise_nodes_conflict (characters, child_count)) {
- /* split-brain */
-
+ if (subvol_status)
+ *subvol_status |= SPLIT_BRAIN;
nsources = -1;
- goto out;
-
} else {
- nsources = afr_sh_mark_wisest_as_sources (sources,
+ nsources = afr_sh_mark_wisest_as_sources (sources,
characters,
child_count);
}
} else {
- nsources = afr_sh_mark_biggest_fool_as_source (sh, characters,
- child_count);
+ if (subvol_status)
+ *subvol_status |= ALL_FOOLS;
+ nsources = afr_mark_biggest_of_fools_as_source (sources,
+ pending_matrix,
+ characters,
+ success_children,
+ child_count, bufs);
}
out:
- FREE (characters);
+ if (nsources == 0)
+ afr_mark_success_children_sources (sources, success_children,
+ child_count);
+ GF_FREE (characters);
- return nsources;
+ gf_log (this->name, GF_LOG_DEBUG, "Number of sources: %d", nsources);
+ return nsources;
}
-
void
afr_sh_pending_to_delta (afr_private_t *priv, dict_t **xattr,
- int32_t *delta_matrix[], int success[],
+ int32_t *delta_matrix[], unsigned char success[],
int child_count, afr_transaction_type type)
{
- int i = 0;
- int j = 0;
- int k = 0;
-
- int32_t * pending = NULL;
- int ret = 0;
-
- /* start clean */
- for (i = 0; i < child_count; i++) {
- for (j = 0; j < child_count; j++) {
- delta_matrix[i][j] = 0;
- }
- }
-
- for (i = 0; i < child_count; i++) {
- pending = NULL;
+ int tgt = 0;
+ int src = 0;
+ int value = 0;
- for (j = 0; j < child_count; j++) {
- ret = dict_get_ptr (xattr[i], priv->pending_key[j],
- VOID(&pending));
-
- if (!success[j])
- continue;
+ afr_build_pending_matrix (priv->pending_key, delta_matrix, NULL,
+ xattr, type, priv->child_count);
- k = afr_index_for_transaction_type (type);
-
- if (pending) {
- delta_matrix[i][j] = -(ntoh32 (pending[k]));
- } else {
- delta_matrix[i][j] = 0;
+ /*
+ * The algorithm here has two parts. First, for each subvol indexed
+ * as tgt, we try to figure out what count everyone should have for it.
+ * If the self-heal succeeded, that's easy; the value is zero.
+ * Otherwise, the value is the maximum of the succeeding nodes' counts.
+ * Once we know the value, we loop through (possibly for a second time)
+ * setting each count to the difference so that when we're done all
+ * succeeding nodes will have the same count for tgt.
+ */
+ for (tgt = 0; tgt < priv->child_count; ++tgt) {
+ value = 0;
+ if (!success[tgt]) {
+ /* Find the maximum. */
+ for (src = 0; src < priv->child_count; ++src) {
+ if (!success[src]) {
+ continue;
+ }
+ if (delta_matrix[src][tgt] > value) {
+ value = delta_matrix[src][tgt];
+ }
}
-
}
- }
+ /* Force everyone who succeeded to the chosen value. */
+ for (src = 0; src < priv->child_count; ++src) {
+ if (success[src]) {
+ delta_matrix[src][tgt] = value
+ - delta_matrix[src][tgt];
+ }
+ else {
+ delta_matrix[src][tgt] = 0;
+ }
+ }
+ }
}
int
-afr_sh_delta_to_xattr (afr_private_t *priv,
+afr_sh_delta_to_xattr (xlator_t *this,
int32_t *delta_matrix[], dict_t *xattr[],
- int child_count, afr_transaction_type type)
+ int child_count, afr_transaction_type type)
{
- int i = 0;
- int j = 0;
- int k = 0;
-
- int ret = 0;
-
- int32_t *pending = 0;
-
- for (i = 0; i < child_count; i++) {
- if (!xattr[i])
- continue;
+ int i = 0;
+ int j = 0;
+ int k = 0;
+ int ret = 0;
+ int32_t *pending = NULL;
+ int32_t *local_pending = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ for (i = 0; i < child_count; i++) {
+ if (!xattr[i])
+ continue;
- for (j = 0; j < child_count; j++) {
- pending = CALLOC (sizeof (int32_t), 3);
+ local_pending = NULL;
+ for (j = 0; j < child_count; j++) {
+ pending = GF_CALLOC (sizeof (int32_t), 3,
+ gf_afr_mt_int32_t);
+
+ if (!pending) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to allocate pending entry "
+ "for %s[%d] on %s",
+ priv->pending_key[j], type,
+ priv->children[i]->name);
+ continue;
+ }
/* 3 = data+metadata+entry */
k = afr_index_for_transaction_type (type);
- pending[k] = hton32 (delta_matrix[i][j]);
+ pending[k] = hton32 (delta_matrix[i][j]);
- ret = dict_set_bin (xattr[i], priv->pending_key[j],
+ if (j == i) {
+ local_pending = pending;
+ continue;
+ }
+ ret = dict_set_bin (xattr[i], priv->pending_key[j],
pending,
- 3 * sizeof (int32_t));
- }
- }
-
- return 0;
+ AFR_NUM_CHANGE_LOGS * sizeof (int32_t));
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unable to set dict value.");
+ GF_FREE (pending);
+ }
+ }
+ if (local_pending) {
+ ret = dict_set_bin (xattr[i], priv->pending_key[i],
+ local_pending,
+ AFR_NUM_CHANGE_LOGS * sizeof (int32_t));
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unable to set dict value.");
+ GF_FREE (local_pending);
+ }
+ }
+ }
+ return 0;
}
int
-afr_sh_has_metadata_pending (dict_t *xattr, int child_count, xlator_t *this)
+afr_sh_missing_entries_done (call_frame_t *frame, xlator_t *this)
{
- afr_private_t *priv = NULL;
- int32_t *pending = NULL;
- void *tmp_pending = NULL; /* This is required to remove 'type-punned' warnings from gcc */
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
- int ret = -1;
- int i = 0;
- int j = 0;
-
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_get_ptr (xattr, priv->pending_key[i],
- &tmp_pending);
+ local = frame->local;
+ sh = &local->self_heal;
- if (ret != 0)
- return 0;
-
- pending = tmp_pending;
+ afr_sh_reset (frame, this);
- j = afr_index_for_transaction_type (AFR_METADATA_TRANSACTION);
+ if (local->unhealable) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "split brain found, aborting selfheal of %s",
+ local->loc.path);
+ }
- if (pending[j])
- return 1;
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
+ sh->completion_cbk (frame, this);
+ } else {
+ gf_log (this->name, GF_LOG_TRACE,
+ "proceeding to metadata check on %s",
+ local->loc.path);
+ afr_self_heal_metadata (frame, this);
}
- return 0;
+ return 0;
}
-int
-afr_sh_has_data_pending (dict_t *xattr, int child_count, xlator_t *this)
+static int
+afr_sh_missing_entries_finish (call_frame_t *frame, xlator_t *this)
{
- afr_private_t *priv = NULL;
- int32_t *pending = NULL;
- void *tmp_pending = NULL; /* This is required to remove 'type-punned' warnings from gcc */
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
- int ret = -1;
- int i = 0;
- int j = 0;
+ local = frame->local;
+ int_lock = &local->internal_lock;
- priv = this->private;
+ int_lock->lock_cbk = afr_sh_missing_entries_done;
+ afr_unlock (frame, this);
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_get_ptr (xattr, priv->pending_key[i],
- &tmp_pending);
+ return 0;
+}
- if (ret != 0)
- return 0;
-
- pending = tmp_pending;
+int
+afr_sh_common_create (afr_self_heal_t *sh, unsigned int child_count)
+{
+ int ret = -ENOMEM;
+ sh->buf = GF_CALLOC (child_count, sizeof (*sh->buf),
+ gf_afr_mt_iatt);
+ if (!sh->buf)
+ goto out;
+ sh->parentbufs = GF_CALLOC (child_count, sizeof (*sh->parentbufs),
+ gf_afr_mt_iatt);
+ if (!sh->parentbufs)
+ goto out;
+ sh->child_errno = GF_CALLOC (child_count, sizeof (*sh->child_errno),
+ gf_afr_mt_int);
+ if (!sh->child_errno)
+ goto out;
+ sh->success_children = afr_children_create (child_count);
+ if (!sh->success_children)
+ goto out;
+ sh->fresh_children = afr_children_create (child_count);
+ if (!sh->fresh_children)
+ goto out;
+ sh->xattr = GF_CALLOC (child_count, sizeof (*sh->xattr),
+ gf_afr_mt_dict_t);
+ if (!sh->xattr)
+ goto out;
+ ret = 0;
+out:
+ return ret;
+}
- j = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
+void
+afr_sh_common_lookup_resp_handler (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ dict_t *xattr, struct iatt *postparent,
+ loc_t *loc)
+{
+ int child_index = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ priv = this->private;
+ sh = &local->self_heal;
+ child_index = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0) {
+ sh->buf[child_index] = *buf;
+ sh->parentbufs[child_index] = *postparent;
+ sh->success_children[sh->success_count] = child_index;
+ sh->success_count++;
+ sh->xattr[child_index] = dict_ref (xattr);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG, "path %s on subvolume"
+ " %s => -1 (%s)", loc->path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ local->self_heal.child_errno[child_index] = op_errno;
+ }
+ }
+ UNLOCK (&frame->lock);
+ return;
+}
- if (pending[j])
- return 1;
+gf_boolean_t
+afr_valid_ia_type (ia_type_t ia_type)
+{
+ switch (ia_type) {
+ case IA_IFSOCK:
+ case IA_IFREG:
+ case IA_IFBLK:
+ case IA_IFCHR:
+ case IA_IFIFO:
+ case IA_IFLNK:
+ case IA_IFDIR:
+ return _gf_true;
+ default:
+ return _gf_false;
}
+ return _gf_false;
+}
- return 0;
+int
+afr_impunge_frame_create (call_frame_t *frame, xlator_t *this,
+ int active_source, call_frame_t **impunge_frame)
+{
+ afr_local_t *local = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ int32_t op_errno = 0;
+ afr_private_t *priv = NULL;
+ int ret = 0;
+ call_frame_t *new_frame = NULL;
+
+ op_errno = ENOMEM;
+ priv = this->private;
+ new_frame = copy_frame (frame);
+ if (!new_frame) {
+ goto out;
+ }
+
+ AFR_LOCAL_ALLOC_OR_GOTO (impunge_local, out);
+
+ local = frame->local;
+ new_frame->local = impunge_local;
+ impunge_sh = &impunge_local->self_heal;
+ impunge_sh->sh_frame = frame;
+ impunge_sh->active_source = active_source;
+ impunge_local->child_up = memdup (local->child_up,
+ sizeof (*local->child_up) *
+ priv->child_count);
+ if (!impunge_local->child_up)
+ goto out;
+
+ impunge_local->pending = afr_matrix_create (priv->child_count,
+ AFR_NUM_CHANGE_LOGS);
+ if (!impunge_local->pending)
+ goto out;
+
+ ret = afr_sh_common_create (impunge_sh, priv->child_count);
+ if (ret) {
+ op_errno = -ret;
+ goto out;
+ }
+ op_errno = 0;
+ *impunge_frame = new_frame;
+out:
+ if (op_errno && new_frame)
+ AFR_STACK_DESTROY (new_frame);
+ return -op_errno;
}
+void
+afr_sh_missing_entry_call_impunge_recreate (call_frame_t *frame, xlator_t *this,
+ struct iatt *buf,
+ struct iatt *postparent,
+ afr_impunge_done_cbk_t impunge_done)
+{
+ call_frame_t *impunge_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ int ret = 0;
+ unsigned int enoent_count = 0;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int32_t op_errno = 0;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ enoent_count = afr_errno_count (NULL, sh->child_errno,
+ priv->child_count, ENOENT);
+ if (!enoent_count) {
+ gf_log (this->name, GF_LOG_INFO,
+ "no missing files - %s. proceeding to metadata check",
+ local->loc.path);
+ goto out;
+ }
+ sh->impunge_done = impunge_done;
+ ret = afr_impunge_frame_create (frame, this, sh->source, &impunge_frame);
+ if (ret)
+ goto out;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+ loc_copy (&impunge_local->loc, &local->loc);
+ ret = afr_build_parent_loc (&impunge_sh->parent_loc,
+ &impunge_local->loc, &op_errno);
+ if (ret) {
+ ret = -op_errno;
+ goto out;
+ }
+ impunge_local->call_count = enoent_count;
+ impunge_sh->entrybuf = sh->buf[sh->source];
+ impunge_sh->parentbuf = sh->parentbufs[sh->source];
+ for (i = 0; i < priv->child_count; i++) {
+ if (!impunge_local->child_up[i]) {
+ impunge_sh->child_errno[i] = ENOTCONN;
+ continue;
+ }
+ if (sh->child_errno[i] != ENOENT) {
+ impunge_sh->child_errno[i] = EEXIST;
+ continue;
+ }
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->child_errno[i] != ENOENT)
+ continue;
+ afr_sh_entry_impunge_create (impunge_frame, this, i);
+ enoent_count--;
+ }
+ GF_ASSERT (!enoent_count);
+ return;
+out:
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "impunge of %s failed, "
+ "reason: %s", local->loc.path, strerror (-ret));
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ }
+ afr_sh_missing_entries_finish (frame, this);
+}
int
-afr_sh_has_entry_pending (dict_t *xattr, int child_count, xlator_t *this)
+afr_sh_create_entry_cbk (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
{
- afr_private_t *priv = NULL;
- int32_t *pending = NULL;
- void *tmp_pending = NULL; /* This is required to remove 'type-punned' warnings from gcc */
-
- int ret = -1;
- int i = 0;
- int j = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ if (op_ret < 0)
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_missing_entries_finish (frame, this);
+ return 0;
+}
- priv = this->private;
+static int
+sh_missing_entries_create (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int type = 0;
+ struct iatt *buf = NULL;
+ struct iatt *postparent = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ buf = &sh->buf[sh->source];
+ postparent = &sh->parentbufs[sh->source];
+
+ type = buf->ia_type;
+ if (!afr_valid_ia_type (type)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: unknown file type: 0%o", local->loc.path, type);
+ afr_set_local_for_unhealable (local);
+ afr_sh_missing_entries_finish (frame, this);
+ goto out;
+ }
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_get_ptr (xattr, priv->pending_key[i],
- &tmp_pending);
+ afr_sh_missing_entry_call_impunge_recreate (frame, this,
+ buf, postparent,
+ afr_sh_create_entry_cbk);
+out:
+ return 0;
+}
- if (ret != 0)
- return 0;
-
- pending = tmp_pending;
+void
+afr_sh_missing_entries_lookup_done (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ ia_type_t ia_type = IA_INVAL;
+ int32_t nsources = 0;
+ loc_t *loc = NULL;
+ int32_t subvol_status = 0;
+ afr_transaction_type txn_type = AFR_DATA_TRANSACTION;
+ gf_boolean_t split_brain = _gf_false;
+ int read_child = -1;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+ loc = &local->loc;
+
+ if (op_ret < 0) {
+ if (op_errno == EIO) {
+ afr_set_local_for_unhealable (local);
+ }
+ // EIO can happen if finding the fresh parent dir failed
+ goto out;
+ }
- j = afr_index_for_transaction_type (AFR_ENTRY_TRANSACTION);
+ //now No chance for the ia_type to conflict
+ ia_type = sh->buf[sh->success_children[0]].ia_type;
+ txn_type = afr_transaction_type_get (ia_type);
+ nsources = afr_build_sources (this, sh->xattr, sh->buf,
+ sh->pending_matrix, sh->sources,
+ sh->success_children, txn_type,
+ &subvol_status, _gf_false);
+ if (nsources < 0) {
+ gf_log (this->name, GF_LOG_INFO, "No sources for dir of %s,"
+ " in missing entry self-heal, continuing with the rest"
+ " of the self-heals", local->loc.path);
+ if (subvol_status & SPLIT_BRAIN) {
+ split_brain = _gf_true;
+ switch (txn_type) {
+ case AFR_DATA_TRANSACTION:
+ nsources = 1;
+ sh->sources[sh->success_children[0]] = 1;
+ break;
+ case AFR_ENTRY_TRANSACTION:
+ read_child = afr_get_no_xattr_dir_read_child
+ (this,
+ sh->success_children,
+ sh->buf);
+ sh->sources[read_child] = 1;
+ nsources = 1;
+ break;
+ default:
+ op_errno = EIO;
+ goto out;
+ }
+ } else {
+ op_errno = EIO;
+ goto out;
+ }
+ }
- if (pending[j])
- return 1;
+ afr_get_fresh_children (sh->success_children, sh->sources,
+ sh->fresh_children, priv->child_count);
+ sh->source = sh->fresh_children[0];
+ if (sh->source == -1) {
+ gf_log (this->name, GF_LOG_DEBUG, "No active sources found.");
+ op_errno = EIO;
+ goto out;
}
- return 0;
+ if (sh->gfid_sh_success_cbk)
+ sh->gfid_sh_success_cbk (frame, this);
+ sh->type = sh->buf[sh->source].ia_type;
+ if (uuid_is_null (loc->inode->gfid))
+ uuid_copy (loc->gfid, sh->buf[sh->source].ia_gfid);
+ if (split_brain) {
+ afr_sh_missing_entries_finish (frame, this);
+ } else {
+ sh_missing_entries_create (frame, this);
+ }
+ return;
+out:
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_set_error (sh, op_errno);
+ afr_sh_missing_entries_finish (frame, this);
+ return;
}
+static int
+afr_sh_common_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ int call_count = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ afr_sh_common_lookup_resp_handler (frame, cookie, this, op_ret,
+ op_errno, inode, buf, xattr,
+ postparent, &sh->lookup_loc);
+ call_count = afr_frame_return (frame);
+
+ if (call_count)
+ goto out;
+ op_ret = -1;
+ if (!sh->success_count) {
+ op_errno = afr_resultant_errno_get (NULL, sh->child_errno,
+ priv->child_count);
+ gf_log (this->name, GF_LOG_ERROR, "Failed to lookup %s, "
+ "reason %s", sh->lookup_loc.path,
+ strerror (op_errno));
+ goto done;
+ }
-/**
- * is_matrix_zero - return true if pending matrix is all zeroes
- */
+ if ((sh->lookup_flags & AFR_LOOKUP_FAIL_CONFLICTS) &&
+ (afr_conflicting_iattrs (sh->buf, sh->success_children,
+ priv->child_count,
+ sh->lookup_loc.path, this->name))) {
+ op_errno = EIO;
+ gf_log (this->name, GF_LOG_ERROR, "Conflicting entries "
+ "for %s", sh->lookup_loc.path);
+ goto done;
+ }
-int
-afr_sh_is_matrix_zero (int32_t *pending_matrix[], int child_count)
-{
- int i, j;
+ if ((sh->lookup_flags & AFR_LOOKUP_FAIL_MISSING_GFIDS) &&
+ (afr_gfid_missing_count (this->name, sh->success_children,
+ sh->buf, priv->child_count,
+ sh->lookup_loc.path))) {
+ op_errno = ENODATA;
+ gf_log (this->name, GF_LOG_ERROR, "Missing Gfids "
+ "for %s", sh->lookup_loc.path);
+ goto done;
+ }
+ op_ret = 0;
- for (i = 0; i < child_count; i++)
- for (j = 0; j < child_count; j++)
- if (pending_matrix[i][j])
- return 0;
- return 1;
+done:
+ sh->lookup_done (frame, this, op_ret, op_errno);
+out:
+ return 0;
}
-
int
-afr_sh_missing_entries_done (call_frame_t *frame, xlator_t *this)
+afr_sh_remove_entry_cbk (call_frame_t *frame, xlator_t *this, int child,
+ int32_t op_ret, int32_t op_errno)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
-// memset (sh->child_errno, 0, sizeof (int) * priv->child_count);
- memset (sh->buf, 0, sizeof (struct stat) * priv->child_count);
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i])
- dict_unref (sh->xattr[i]);
- sh->xattr[i] = NULL;
- }
+ int call_count = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ GF_ASSERT (sh->post_remove_call);
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "purge entry %s failed, on child %d reason, %s",
+ local->loc.path, child, strerror (op_errno));
+ LOCK (&frame->lock);
+ {
+ afr_sh_set_error (sh, EIO);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ }
+ UNLOCK (&frame->lock);
+ }
+ call_count = afr_frame_return (frame);
+ if (call_count == 0)
+ sh->post_remove_call (frame, this);
+ return 0;
+}
- if (local->govinda_gOvinda) {
- gf_log (this->name, GF_LOG_TRACE,
- "aborting selfheal of %s",
- local->loc.path);
- sh->completion_cbk (frame, this);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "proceeding to metadata check on %s",
- local->loc.path);
- afr_self_heal_metadata (frame, this);
- }
+void
+afr_sh_call_entry_expunge_remove (call_frame_t *frame, xlator_t *this,
+ int child_index, struct iatt *buf,
+ struct iatt *parentbuf,
+ afr_expunge_done_cbk_t expunge_done)
+{
+ call_frame_t *expunge_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_local_t *expunge_local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_self_heal_t *expunge_sh = NULL;
+ int32_t op_errno = 0;
+ int ret = 0;
+
+ expunge_frame = copy_frame (frame);
+ if (!expunge_frame) {
+ goto out;
+ }
- return 0;
+ AFR_LOCAL_ALLOC_OR_GOTO (expunge_local, out);
+
+ local = frame->local;
+ sh = &local->self_heal;
+ expunge_frame->local = expunge_local;
+ expunge_sh = &expunge_local->self_heal;
+ expunge_sh->sh_frame = frame;
+ loc_copy (&expunge_local->loc, &local->loc);
+ ret = afr_build_parent_loc (&expunge_sh->parent_loc,
+ &expunge_local->loc, &op_errno);
+ if (ret) {
+ ret = -op_errno;
+ goto out;
+ }
+ sh->expunge_done = expunge_done;
+ afr_sh_entry_expunge_remove (expunge_frame, this, child_index, buf,
+ parentbuf);
+ return;
+out:
+ gf_log (this->name, GF_LOG_ERROR, "Expunge of %s failed, reason: %s",
+ local->loc.path, strerror (op_errno));
+ expunge_done (frame, this, child_index, -1, op_errno);
}
+void
+afr_sh_remove_stale_lookup_info (afr_self_heal_t *sh, int32_t *success_children,
+ int32_t *fresh_children,
+ unsigned int child_count)
+{
+ int i = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (afr_is_child_present (success_children, child_count, i) &&
+ !afr_is_child_present (fresh_children, child_count, i)) {
+ sh->child_errno[i] = ENOENT;
+ GF_ASSERT (sh->xattr[i]);
+ dict_unref (sh->xattr[i]);
+ sh->xattr[i] = NULL;
+ }
+ }
+}
int
-sh_missing_entries_unlck_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+afr_sh_purge_stale_entries_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
+ afr_sh_missing_entries_finish (frame, this);
+ } else {
+ if (afr_gfid_missing_count (this->name, sh->fresh_children,
+ sh->buf, priv->child_count,
+ local->loc.path)) {
+ afr_sh_common_lookup (frame, this, &local->loc,
+ afr_sh_missing_entries_lookup_done,
+ sh->sh_gfid_req,
+ AFR_LOOKUP_FAIL_CONFLICTS|
+ AFR_LOOKUP_FAIL_MISSING_GFIDS,
+ NULL);
+ } else {
+ //No need to set gfid so goto missing entries lookup done
+ //Behave as if you have done the lookup
+ afr_sh_remove_stale_lookup_info (sh,
+ sh->success_children,
+ sh->fresh_children,
+ priv->child_count);
+ afr_children_copy (sh->success_children,
+ sh->fresh_children,
+ priv->child_count);
+ afr_sh_missing_entries_lookup_done (frame, this, 0, 0);
+ }
+ }
+ return 0;
+}
- LOCK (&frame->lock);
- {
- }
- UNLOCK (&frame->lock);
+gf_boolean_t
+afr_sh_purge_entry_condition (afr_local_t *local, afr_private_t *priv,
+ int child)
+{
+ afr_self_heal_t *sh = NULL;
- call_count = afr_frame_return (frame);
+ sh = &local->self_heal;
- if (call_count == 0) {
- afr_sh_missing_entries_done (frame, this);
- }
+ if (local->child_up[child] &&
+ (!afr_is_child_present (sh->fresh_parent_dirs, priv->child_count,
+ child))
+ && (sh->child_errno[child] != ENOENT))
+ return _gf_true;
- return 0;
+ return _gf_false;
}
-
-static int
-sh_missing_entries_finish (call_frame_t *frame, xlator_t *this)
+gf_boolean_t
+afr_sh_purge_stale_entry_condition (afr_local_t *local, afr_private_t *priv,
+ int child)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int i = 0;
- int call_count = 0;
- afr_self_heal_t *sh = NULL;
+ afr_self_heal_t *sh = NULL;
+ sh = &local->self_heal;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ if (local->child_up[child] &&
+ (!afr_is_child_present (sh->fresh_children, priv->child_count,
+ child))
+ && (sh->child_errno[child] != ENOENT))
+ return _gf_true;
- call_count = local->child_count;
+ return _gf_false;
+}
- local->call_count = call_count;
+void
+afr_sh_purge_entry_common (call_frame_t *frame, xlator_t *this,
+ gf_boolean_t purge_condition (afr_local_t *local,
+ afr_private_t *priv,
+ int child))
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+ int i = 0;
+ int call_count = 0;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- gf_log (this->name, GF_LOG_TRACE,
- "unlocking %"PRId64"/%s on subvolume %s",
- sh->parent_loc.inode->ino, local->loc.name,
- priv->children[i]->name);
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
- STACK_WIND (frame, sh_missing_entries_unlck_cbk,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name,
- &sh->parent_loc, local->loc.name,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
+ for (i = 0; i < priv->child_count; i++) {
+ if (purge_condition (local, priv, i))
+ call_count++;
+ }
- if (!--call_count)
- break;
- }
- }
- return 0;
-}
+ if (call_count == 0) {
+ sh->post_remove_call (frame, this);
+ goto out;
+ }
+ local->call_count = call_count;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!purge_condition (local, priv, i))
+ continue;
+ gf_log (this->name, GF_LOG_INFO, "purging the stale entry %s "
+ "on %s", local->loc.path, priv->children[i]->name);
+ afr_sh_call_entry_expunge_remove (frame, this,
+ (long) i, &sh->buf[i],
+ &sh->parentbufs[i],
+ afr_sh_remove_entry_cbk);
+ }
+out:
+ return;
+}
-static int
-sh_destroy_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int op_errno, struct stat *stbuf)
+void
+afr_sh_purge_entry (call_frame_t *frame, xlator_t *this)
{
- STACK_DESTROY (frame->root);
- return 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ sh->post_remove_call = afr_sh_missing_entries_finish;
+
+ afr_sh_purge_entry_common (frame, this, afr_sh_purge_entry_condition);
}
+void
+afr_sh_purge_stale_entry (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
-static int
-sh_missing_entries_newentry_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *stbuf)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- call_frame_t *chown_frame = NULL;
- int call_count = 0;
- int child_index = 0;
- struct stat *buf = NULL;
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- buf = &sh->buf[sh->source];
- child_index = (long) cookie;
-
- if (op_ret == 0) {
- chown_frame = copy_frame (frame);
-
- gf_log (this->name, GF_LOG_TRACE,
- "chown %s to %d %d on subvolume %s",
- local->loc.path, buf->st_uid, buf->st_gid,
- priv->children[child_index]->name);
-
- STACK_WIND (chown_frame, sh_destroy_cbk,
- priv->children[child_index],
- priv->children[child_index]->fops->chown,
- &local->loc,
- buf->st_uid, buf->st_gid);
- }
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
- LOCK (&frame->lock);
- {
- }
- UNLOCK (&frame->lock);
+ sh->post_remove_call = afr_sh_purge_stale_entries_done;
- call_count = afr_frame_return (frame);
+ for (i = 0; i < priv->child_count; i++) {
+ if (afr_is_child_present (sh->fresh_children,
+ priv->child_count, i))
+ continue;
- if (call_count == 0) {
- sh_missing_entries_finish (frame, this);
- }
+ if ((!local->child_up[i]) || sh->child_errno[i] != 0)
+ continue;
- return 0;
+ GF_ASSERT (!uuid_is_null (sh->entrybuf.ia_gfid) ||
+ uuid_is_null (sh->buf[i].ia_gfid));
+
+ if ((sh->entrybuf.ia_type != sh->buf[i].ia_type) ||
+ (uuid_compare (sh->buf[i].ia_gfid,
+ sh->entrybuf.ia_gfid)))
+ continue;
+
+ afr_children_add_child (sh->fresh_children, i,
+ priv->child_count);
+
+ }
+ afr_sh_purge_entry_common (frame, this,
+ afr_sh_purge_stale_entry_condition);
}
+void
+afr_sh_save_child_iatts_from_policy (int32_t *children, struct iatt *bufs,
+ struct iatt *save,
+ unsigned int child_count)
+{
+ int i = 0;
+ int child = 0;
+ gf_boolean_t saved = _gf_false;
-static int
-sh_missing_entries_mknod (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int enoent_count = 0;
- int call_count = 0;
- mode_t st_mode = 0;
- dev_t st_dev = 0;
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++)
- if (sh->child_errno[i] == ENOENT)
- enoent_count++;
-
- call_count = enoent_count;
- local->call_count = call_count;
-
- st_mode = sh->buf[sh->source].st_mode;
- st_dev = sh->buf[sh->source].st_dev;
-
- gf_log (this->name, GF_LOG_TRACE,
- "mknod %s mode 0%o on %d subvolumes",
- local->loc.path, st_mode, enoent_count);
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->child_errno[i] == ENOENT) {
- STACK_WIND_COOKIE (frame,
- sh_missing_entries_newentry_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->mknod,
- &local->loc, st_mode, st_dev);
- if (!--call_count)
- break;
- }
- }
+ GF_ASSERT (save);
+ //if iatt buf with gfid exists sets it
+ for (i = 0; i < child_count; i++) {
+ child = children[i];
+ if (child == -1)
+ break;
+ *save = bufs[child];
+ saved = _gf_true;
+ if (!uuid_is_null (save->ia_gfid))
+ break;
+ }
+ GF_ASSERT (saved);
+}
- return 0;
+void
+afr_get_children_of_fresh_parent_dirs (afr_self_heal_t *sh,
+ unsigned int child_count)
+{
+ afr_children_intersection_get (sh->success_children,
+ sh->fresh_parent_dirs,
+ sh->sources, child_count);
+ afr_get_fresh_children (sh->success_children, sh->sources,
+ sh->fresh_children, child_count);
+ memset (sh->sources, 0, sizeof (*sh->sources) * child_count);
}
+void
+afr_sh_children_lookup_done (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int32_t fresh_child_enoents = 0;
+ int32_t fresh_parent_count = 0;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ if (op_ret < 0)
+ goto fail;
+ afr_get_children_of_fresh_parent_dirs (sh, priv->child_count);
+ fresh_parent_count = afr_get_children_count (sh->fresh_parent_dirs,
+ priv->child_count);
+ //we need the enoent count of the subvols present in fresh_parent_dirs
+ fresh_child_enoents = afr_errno_count (sh->fresh_parent_dirs,
+ sh->child_errno,
+ priv->child_count, ENOENT);
+ if (fresh_child_enoents == fresh_parent_count) {
+ afr_sh_set_error (sh, ENOENT);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_purge_entry (frame, this);
+ } else if (!afr_conflicting_iattrs (sh->buf, sh->fresh_children,
+ priv->child_count, local->loc.path,
+ this->name)) {
+ afr_sh_save_child_iatts_from_policy (sh->fresh_children,
+ sh->buf, &sh->entrybuf,
+ priv->child_count);
+ afr_update_gfid_from_iatts (sh->sh_gfid_req, sh->buf,
+ sh->fresh_children,
+ priv->child_count);
+ afr_sh_purge_stale_entry (frame, this);
+ } else {
+ op_errno = EIO;
+ afr_set_local_for_unhealable (local);
+ goto fail;
+ }
-static int
-sh_missing_entries_mkdir (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int enoent_count = 0;
- int call_count = 0;
- mode_t st_mode = 0;
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++)
- if (sh->child_errno[i] == ENOENT)
- enoent_count++;
-
- call_count = enoent_count;
- local->call_count = call_count;
-
- st_mode = sh->buf[sh->source].st_mode;
-
- gf_log (this->name, GF_LOG_TRACE,
- "mkdir %s mode 0%o on %d subvolumes",
- local->loc.path, st_mode, enoent_count);
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->child_errno[i] == ENOENT) {
- STACK_WIND_COOKIE (frame,
- sh_missing_entries_newentry_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->mkdir,
- &local->loc, st_mode);
- if (!--call_count)
- break;
- }
- }
+ return;
- return 0;
+fail:
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_set_error (sh, op_errno);
+ afr_sh_missing_entries_finish (frame, this);
+ return;
}
+static void
+afr_sh_find_fresh_parents (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int enoent_count = 0;
+ int nsources = 0;
+ int source = -1;
+ int32_t subvol_status = 0;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ if (op_ret < 0)
+ goto out;
+ enoent_count = afr_errno_count (NULL, sh->child_errno,
+ priv->child_count, ENOENT);
+ if (enoent_count > 0) {
+ gf_log (this->name, GF_LOG_INFO, "Parent dir missing for %s,"
+ " in missing entry self-heal, aborting missing-entry "
+ "self-heal",
+ local->loc.path);
+ afr_sh_missing_entries_finish (frame, this);
+ return;
+ }
-static int
-sh_missing_entries_symlink (call_frame_t *frame, xlator_t *this,
- const char *link)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int enoent_count = 0;
- int call_count = 0;
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++)
- if (sh->child_errno[i] == ENOENT)
- enoent_count++;
-
- call_count = enoent_count;
- local->call_count = call_count;
-
- gf_log (this->name, GF_LOG_TRACE,
- "symlink %s -> %s on %d subvolumes",
- local->loc.path, link, enoent_count);
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->child_errno[i] == ENOENT) {
- STACK_WIND_COOKIE (frame,
- sh_missing_entries_newentry_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->symlink,
- link, &local->loc);
- if (!--call_count)
- break;
- }
- }
+ nsources = afr_build_sources (this, sh->xattr, sh->buf,
+ sh->pending_matrix, sh->sources,
+ sh->success_children,
+ AFR_ENTRY_TRANSACTION, &subvol_status,
+ _gf_true);
+ if ((subvol_status & ALL_FOOLS) ||
+ (subvol_status & SPLIT_BRAIN)) {
+ gf_log (this->name, GF_LOG_INFO, "%s: Performing conservative "
+ "merge", sh->parent_loc.path);
+ afr_mark_success_children_sources (sh->sources,
+ sh->success_children,
+ priv->child_count);
+ } else if (nsources < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "No sources for dir "
+ "of %s, in missing entry self-heal, aborting "
+ "self-heal", local->loc.path);
+ op_errno = EIO;
+ goto out;
+ }
- return 0;
+ source = afr_sh_select_source (sh->sources, priv->child_count);
+ if (source == -1) {
+ GF_ASSERT (0);
+ gf_log (this->name, GF_LOG_DEBUG, "No active sources found.");
+ op_errno = EIO;
+ goto out;
+ }
+ afr_get_fresh_children (sh->success_children, sh->sources,
+ sh->fresh_parent_dirs, priv->child_count);
+ afr_sh_common_lookup (frame, this, &local->loc,
+ afr_sh_children_lookup_done, NULL, 0,
+ NULL);
+ return;
+
+out:
+ afr_sh_set_error (sh, op_errno);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_missing_entries_finish (frame, this);
+ return;
}
+void
+afr_sh_common_reset (afr_self_heal_t *sh, unsigned int child_count)
+{
+ int i = 0;
+
+ for (i = 0; i < child_count; i++) {
+ memset (&sh->buf[i], 0, sizeof (sh->buf[i]));
+ memset (&sh->parentbufs[i], 0, sizeof (sh->parentbufs[i]));
+ sh->child_errno[i] = 0;
+ }
+ memset (&sh->parentbuf, 0, sizeof (sh->parentbuf));
+ sh->success_count = 0;
+ afr_reset_children (sh->success_children, child_count);
+ afr_reset_children (sh->fresh_children, child_count);
+ afr_reset_xattr (sh->xattr, child_count);
+ loc_wipe (&sh->lookup_loc);
+}
-static int
-sh_missing_entries_readlink_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- const char *link)
+/* afr self-heal state will be lost if this call is made
+ * please check the afr_sh_common_reset that is called in this function
+ */
+int
+afr_sh_common_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ afr_lookup_done_cbk_t lookup_done , uuid_t gfid,
+ int32_t flags, dict_t *xdata)
{
- if (op_ret > 0)
- sh_missing_entries_symlink (frame, this, link);
- else
- sh_missing_entries_finish (frame, this);
+ afr_local_t *local = NULL;
+ int i = 0;
+ int call_count = 0;
+ afr_private_t *priv = NULL;
+ dict_t *xattr_req = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ priv = this->private;
+ sh = &local->self_heal;
+
+ call_count = afr_up_children_count (local->child_up, priv->child_count);
+
+ local->call_count = call_count;
+
+ xattr_req = dict_new();
+
+ if (xattr_req) {
+ afr_xattr_req_prepare (this, xattr_req, loc->path);
+ if (gfid) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "looking up %s with gfid: %s",
+ loc->path, uuid_utoa (gfid));
+ GF_ASSERT (!uuid_is_null (gfid));
+ afr_set_dict_gfid (xattr_req, gfid);
+ }
+ }
- return 0;
+ afr_sh_common_reset (sh, priv->child_count);
+ sh->lookup_done = lookup_done;
+ loc_copy (&sh->lookup_loc, loc);
+ sh->lookup_flags = flags;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "looking up %s on subvolume %s",
+ loc->path, priv->children[i]->name);
+
+ STACK_WIND_COOKIE (frame,
+ afr_sh_common_lookup_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->lookup,
+ loc, xattr_req);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ return 0;
}
-static int
-sh_missing_entries_readlink (call_frame_t *frame, xlator_t *this)
+
+int
+afr_sh_post_nb_entrylk_missing_entry_sh_cbk (call_frame_t *frame,
+ xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ sh = &local->self_heal;
+
+ if (int_lock->lock_op_ret < 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Non blocking entrylks failed.");
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_missing_entries_done (frame, this);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Non blocking entrylks done. Proceeding to FOP");
+ afr_sh_common_lookup (frame, this, &sh->parent_loc,
+ afr_sh_find_fresh_parents,
+ NULL, AFR_LOOKUP_FAIL_CONFLICTS,
+ NULL);
+ }
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ return 0;
+}
- STACK_WIND (frame, sh_missing_entries_readlink_cbk,
- priv->children[sh->source],
- priv->children[sh->source]->fops->readlink,
- &local->loc, 4096);
+int
+afr_sh_entrylk (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ char *base_name, afr_lock_cbk_t lock_cbk)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- return 0;
+ priv = this->private;
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ int_lock->transaction_lk_type = AFR_SELFHEAL_LK;
+ int_lock->selfheal_lk_type = AFR_ENTRY_SELF_HEAL_LK;
+
+ afr_set_lock_number (frame, this);
+
+ int_lock->lk_basename = base_name;
+ int_lock->lk_loc = loc;
+ int_lock->lock_cbk = lock_cbk;
+ int_lock->domain = this->name;
+
+ int_lock->lockee_count = 0;
+ afr_init_entry_lockee (&int_lock->lockee[0], local, loc,
+ base_name, priv->child_count);
+ int_lock->lockee_count++;
+ afr_nonblocking_entrylk (frame, this);
+
+ return 0;
}
+static int
+afr_self_heal_parent_entrylk (call_frame_t *frame, xlator_t *this,
+ afr_lock_cbk_t lock_cbk)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ int ret = -1;
+ int32_t op_errno = 0;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "attempting to recreate missing entries for path=%s",
+ local->loc.path);
+
+ ret = afr_build_parent_loc (&sh->parent_loc, &local->loc, &op_errno);
+ if (ret)
+ goto out;
+
+ afr_sh_entrylk (frame, this, &sh->parent_loc, NULL,
+ lock_cbk);
+ return 0;
+out:
+ int_lock = &local->internal_lock;
+ int_lock->lock_op_ret = -1;
+ lock_cbk (frame, this);
+ return 0;
+}
static int
-sh_missing_entries_create (call_frame_t *frame, xlator_t *this)
+afr_self_heal_missing_entries (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int type = 0;
- int i = 0;
- afr_private_t *priv = NULL;
- int enoent_count = 0;
- int govinda_gOvinda = 0;
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->child_errno[i]) {
- if (sh->child_errno[i] == ENOENT)
- enoent_count++;
- } else {
- if (type) {
- if (type != (sh->buf[i].st_mode & S_IFMT))
- govinda_gOvinda = 1;
- } else {
- sh->source = i;
- type = sh->buf[i].st_mode & S_IFMT;
- }
- }
- }
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
- if (govinda_gOvinda) {
- gf_log (this->name, GF_LOG_ERROR,
- "conflicing filetypes exist for path %s. returning.",
- local->loc.path);
+ local = frame->local;
+ sh = &local->self_heal;
- local->govinda_gOvinda = 1;
- sh_missing_entries_finish (frame, this);
- return 0;
- }
+ sh->sh_type_in_action = AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY;
- if (!type) {
- gf_log (this->name, GF_LOG_ERROR,
- "no source found for %s. all nodes down?. returning.",
- local->loc.path);
- /* subvolumes down and/or file does not exist */
- sh_missing_entries_finish (frame, this);
- return 0;
- }
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
- if (enoent_count == 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "no missing files - %s. proceeding to metadata check",
- local->loc.path);
- /* proceed to next step - metadata self-heal */
- sh_missing_entries_finish (frame, this);
- return 0;
- }
+ afr_self_heal_parent_entrylk (frame, this,
+ afr_sh_post_nb_entrylk_missing_entry_sh_cbk);
+ return 0;
+}
- switch (type) {
- case S_IFSOCK:
- case S_IFREG:
- case S_IFBLK:
- case S_IFCHR:
- case S_IFIFO:
- sh_missing_entries_mknod (frame, this);
- break;
- case S_IFLNK:
- sh_missing_entries_readlink (frame, this);
- break;
- case S_IFDIR:
- sh_missing_entries_mkdir (frame, this);
- break;
- default:
- gf_log (this->name, GF_LOG_ERROR,
- "unknown file type: 0%o", type);
- local->govinda_gOvinda = 1;
- sh_missing_entries_finish (frame, this);
- }
+afr_local_t*
+afr_self_heal_local_init (afr_local_t *l, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *lc = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_self_heal_t *shc = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ sh = &l->self_heal;
+
+ lc = mem_get0 (this->local_pool);
+ if (!lc)
+ goto out;
+
+ shc = &lc->self_heal;
+
+ shc->unwind = sh->unwind;
+ shc->gfid_sh_success_cbk = sh->gfid_sh_success_cbk;
+ shc->do_missing_entry_self_heal = sh->do_missing_entry_self_heal;
+ shc->do_gfid_self_heal = sh->do_gfid_self_heal;
+ shc->do_data_self_heal = sh->do_data_self_heal;
+ shc->do_metadata_self_heal = sh->do_metadata_self_heal;
+ shc->do_entry_self_heal = sh->do_entry_self_heal;
+ shc->force_confirm_spb = sh->force_confirm_spb;
+ shc->forced_merge = sh->forced_merge;
+ shc->background = sh->background;
+ shc->type = sh->type;
+ shc->data_sh_info = "";
+ shc->metadata_sh_info = "";
+
+ uuid_copy (shc->sh_gfid_req, sh->sh_gfid_req);
+ if (l->loc.path) {
+ ret = loc_copy (&lc->loc, &l->loc);
+ if (ret < 0)
+ goto out;
+ }
- return 0;
+ lc->child_up = memdup (l->child_up,
+ sizeof (*lc->child_up) * priv->child_count);
+ if (!lc->child_up) {
+ ret = -1;
+ goto out;
+ }
+
+ if (l->xattr_req)
+ lc->xattr_req = dict_ref (l->xattr_req);
+
+ if (l->cont.lookup.inode)
+ lc->cont.lookup.inode = inode_ref (l->cont.lookup.inode);
+ if (l->cont.lookup.xattr)
+ lc->cont.lookup.xattr = dict_ref (l->cont.lookup.xattr);
+
+ lc->internal_lock.locked_nodes =
+ GF_CALLOC (sizeof (*l->internal_lock.locked_nodes),
+ priv->child_count, gf_afr_mt_char);
+ if (!lc->internal_lock.locked_nodes) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = afr_inodelk_init (&lc->internal_lock.inodelk[0],
+ this->name, priv->child_count);
+ if (ret)
+ goto out;
+
+out:
+ if (ret) {
+ afr_local_cleanup (lc, this);
+ lc = NULL;
+ }
+ return lc;
}
+int
+afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ afr_self_heal_t * sh = NULL;
+ afr_local_t * orig_frame_local = NULL;
+ afr_self_heal_t * orig_frame_sh = NULL;
+ char sh_type_str[256] = {0,};
+ gf_loglevel_t loglevel = 0;
+
+ priv = this->private;
+ local = bgsh_frame->local;
+ sh = &local->self_heal;
+
+ if (local->unhealable) {
+ afr_set_split_brain (this, sh->inode, SPB, SPB);
+ }
-static int
-sh_missing_entries_lookup_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *buf, dict_t *xattr)
-{
- int child_index = 0;
- afr_local_t *local = NULL;
- int call_count = 0;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "path %s on subvolume %s is of mode 0%o",
- local->loc.path,
- priv->children[child_index]->name,
- buf->st_mode);
-
- local->self_heal.buf[child_index] = *buf;
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "path %s on subvolume %s => -1 (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
-
- local->self_heal.child_errno[child_index] = op_errno;
- }
+ afr_self_heal_type_str_get (sh, sh_type_str,
+ sizeof(sh_type_str));
+ if (is_self_heal_failed (sh, AFR_CHECK_ALL) && !priv->shd.iamshd) {
+ loglevel = GF_LOG_ERROR;
+ } else if (!is_self_heal_failed (sh, AFR_CHECK_ALL)) {
+ loglevel = GF_LOG_INFO;
+ } else {
+ loglevel = GF_LOG_DEBUG;
+ }
- }
- UNLOCK (&frame->lock);
+ afr_log_self_heal_completion_status (local, loglevel);
- call_count = afr_frame_return (frame);
+ FRAME_SU_UNDO (bgsh_frame, afr_local_t);
- if (call_count == 0) {
- sh_missing_entries_create (frame, this);
- }
+ if (!sh->unwound && sh->unwind) {
+ orig_frame_local = sh->orig_frame->local;
+ orig_frame_sh = &orig_frame_local->self_heal;
+ orig_frame_sh->actual_sh_started = _gf_true;
+ sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno,
+ is_self_heal_failed (sh, AFR_CHECK_ALL));
+ }
- return 0;
+ if (sh->background) {
+ LOCK (&priv->lock);
+ {
+ priv->background_self_heals_started--;
+ }
+ UNLOCK (&priv->lock);
+ }
+
+ AFR_STACK_DESTROY (bgsh_frame);
+
+ return 0;
}
+int
+afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int32_t op_errno = 0;
+ int ret = 0;
+ afr_self_heal_t *orig_sh = NULL;
+ call_frame_t *sh_frame = NULL;
+ afr_local_t *sh_local = NULL;
+ loc_t *loc = NULL;
+
+ local = frame->local;
+ orig_sh = &local->self_heal;
+ priv = this->private;
+
+ GF_ASSERT (local->loc.path);
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "performing self heal on %s (metadata=%d data=%d entry=%d)",
+ local->loc.path,
+ local->self_heal.do_metadata_self_heal,
+ local->self_heal.do_data_self_heal,
+ local->self_heal.do_entry_self_heal);
+
+ op_errno = ENOMEM;
+ sh_frame = copy_frame (frame);
+ if (!sh_frame)
+ goto out;
+ afr_set_lk_owner (sh_frame, this, sh_frame->root);
+ afr_set_low_priority (sh_frame);
+
+ sh_local = afr_self_heal_local_init (local, this);
+ if (!sh_local)
+ goto out;
+ sh_frame->local = sh_local;
+ sh = &sh_local->self_heal;
+
+ sh->inode = inode_ref (inode);
+ sh->orig_frame = frame;
+
+ sh->completion_cbk = afr_self_heal_completion_cbk;
+
+ sh->success = GF_CALLOC (priv->child_count, sizeof (*sh->success),
+ gf_afr_mt_char);
+ if (!sh->success)
+ goto out;
+ sh->sources = GF_CALLOC (sizeof (*sh->sources), priv->child_count,
+ gf_afr_mt_int);
+ if (!sh->sources)
+ goto out;
+ sh->locked_nodes = GF_CALLOC (sizeof (*sh->locked_nodes),
+ priv->child_count,
+ gf_afr_mt_int);
+ if (!sh->locked_nodes)
+ goto out;
+
+ sh->pending_matrix = afr_matrix_create (priv->child_count,
+ priv->child_count);
+ if (!sh->pending_matrix)
+ goto out;
+
+ sh->delta_matrix = afr_matrix_create (priv->child_count,
+ priv->child_count);
+ if (!sh->delta_matrix)
+ goto out;
+
+ sh->fresh_parent_dirs = afr_children_create (priv->child_count);
+ if (!sh->fresh_parent_dirs)
+ goto out;
+ ret = afr_sh_common_create (sh, priv->child_count);
+ if (ret) {
+ op_errno = -ret;
+ goto out;
+ }
-static int
-sh_missing_entries_lookup (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- int i = 0;
- int call_count = 0;
- afr_private_t *priv = NULL;
- dict_t *xattr_req = NULL;
- int ret = -1;
-
- local = frame->local;
- call_count = local->child_count;
- priv = this->private;
-
- local->call_count = call_count;
-
- xattr_req = dict_new();
-
- if (xattr_req) {
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_uint64 (xattr_req,
- priv->pending_key[i],
- 3 * sizeof(int32_t));
+ if (local->self_heal.background) {
+ LOCK (&priv->lock);
+ {
+ if (priv->background_self_heals_started
+ < priv->background_self_heal_count) {
+ priv->background_self_heals_started++;
+
+
+ } else {
+ local->self_heal.background = _gf_false;
+ sh->background = _gf_false;
+ }
}
+ UNLOCK (&priv->lock);
}
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s on subvolume %s",
- local->loc.path, priv->children[i]->name);
+ if (!local->loc.parent) {
+ sh->do_missing_entry_self_heal = _gf_false;
+ sh->do_gfid_self_heal = _gf_false;
+ }
- STACK_WIND_COOKIE (frame,
- sh_missing_entries_lookup_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->lookup,
- &local->loc, xattr_req);
+ sh->sh_type_in_action = AFR_SELF_HEAL_INVALID;
- if (!--call_count)
- break;
- }
- }
-
- if (xattr_req)
- dict_unref (xattr_req);
+ FRAME_SU_DO (sh_frame, afr_local_t);
+ if (sh->do_missing_entry_self_heal || sh->do_gfid_self_heal) {
+ afr_self_heal_missing_entries (sh_frame, this);
+ } else {
+ loc = &sh_local->loc;
+ if (uuid_is_null (loc->inode->gfid) && uuid_is_null (loc->gfid)) {
+ if (!uuid_is_null (inode->gfid))
+ GF_ASSERT (!uuid_compare (inode->gfid,
+ sh->sh_gfid_req));
+ uuid_copy (loc->gfid, sh->sh_gfid_req);
+ }
+ gf_log (this->name, GF_LOG_TRACE,
+ "proceeding to metadata check on %s",
+ local->loc.path);
- return 0;
+ afr_sh_missing_entries_done (sh_frame, this);
+ }
+ op_errno = 0;
+
+out:
+ if (op_errno) {
+ orig_sh->unwind (frame, this, -1, op_errno, 1);
+ if (sh_frame)
+ AFR_STACK_DESTROY (sh_frame);
+ }
+ return 0;
}
+void
+afr_self_heal_type_str_get (afr_self_heal_t *self_heal_p, char *str,
+ size_t size)
+{
+ GF_ASSERT (str && (size > strlen (" missing-entry gfid "
+ "meta-data data entry")));
-static int
-sh_missing_entries_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
-
-
- local = frame->local;
- sh = &local->self_heal;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- sh->op_failed = 1;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "locking inode of %s on child %d failed: %s",
- local->loc.path, child_index,
- strerror (op_errno));
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "inode of %s on child %d locked",
- local->loc.path, child_index);
- }
- }
- UNLOCK (&frame->lock);
+ if (self_heal_p->do_metadata_self_heal) {
+ snprintf (str, size, " meta-data");
+ }
- call_count = afr_frame_return (frame);
+ if (self_heal_p->do_data_self_heal) {
+ snprintf (str + strlen(str), size - strlen(str), " data");
+ }
- if (call_count == 0) {
- if (sh->op_failed == 1) {
- sh_missing_entries_finish (frame, this);
- return 0;
- }
+ if (self_heal_p->do_entry_self_heal) {
+ snprintf (str + strlen(str), size - strlen(str), " entry");
+ }
- sh_missing_entries_lookup (frame, this);
- }
+ if (self_heal_p->do_missing_entry_self_heal) {
+ snprintf (str + strlen(str), size - strlen(str),
+ " missing-entry");
+ }
- return 0;
+ if (self_heal_p->do_gfid_self_heal) {
+ snprintf (str + strlen(str), size - strlen(str), " gfid");
+ }
}
+afr_self_heal_type
+afr_self_heal_type_for_transaction (afr_transaction_type type)
+{
+ afr_self_heal_type sh_type = AFR_SELF_HEAL_INVALID;
+
+ switch (type) {
+ case AFR_DATA_TRANSACTION:
+ sh_type = AFR_SELF_HEAL_DATA;
+ break;
+ case AFR_METADATA_TRANSACTION:
+ sh_type = AFR_SELF_HEAL_METADATA;
+ break;
+ case AFR_ENTRY_TRANSACTION:
+ sh_type = AFR_SELF_HEAL_ENTRY;
+ break;
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ GF_ASSERT (0);
+ break;
+ }
+ return sh_type;
+}
-static int
-afr_self_heal_missing_entries (call_frame_t *frame, xlator_t *this)
+int
+afr_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int call_count = 0;
+ int ret = -1;
+ uuid_t pargfid = {0};
+ if (!child)
+ goto out;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ if (!uuid_is_null (parent->inode->gfid))
+ uuid_copy (pargfid, parent->inode->gfid);
+ else if (!uuid_is_null (parent->gfid))
+ uuid_copy (pargfid, parent->gfid);
- gf_log (this->name, GF_LOG_TRACE,
- "attempting to recreate missing entries for path=%s",
- local->loc.path);
+ if (uuid_is_null (pargfid))
+ goto out;
- afr_build_parent_loc (&sh->parent_loc, &local->loc);
+ if (strcmp (parent->path, "/") == 0)
+ ret = gf_asprintf ((char **)&child->path, "/%s", name);
+ else
+ ret = gf_asprintf ((char **)&child->path, "%s/%s", parent->path,
+ name);
- call_count = local->child_count;
+ if (-1 == ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "asprintf failed while setting child path");
+ }
- local->call_count = call_count;
+ child->name = strrchr (child->path, '/');
+ if (child->name)
+ child->name++;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, sh_missing_entries_lk_cbk,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name,
- &sh->parent_loc, local->loc.name,
- ENTRYLK_LOCK_NB, ENTRYLK_WRLCK);
- if (!--call_count)
- break;
- }
- }
+ child->parent = inode_ref (parent->inode);
+ child->inode = inode_new (parent->inode->table);
+ uuid_copy (child->pargfid, pargfid);
- return 0;
+ if (!child->inode) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if ((ret == -1) && child)
+ loc_wipe (child);
+
+ return ret;
}
+int
+afr_sh_erase_pending (call_frame_t *frame, xlator_t *this,
+ afr_transaction_type type, afr_fxattrop_cbk_t cbk,
+ int (*finish)(call_frame_t *frame, xlator_t *this))
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int i = 0;
+ dict_t **erase_xattr = NULL;
+ int ret = -1;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix,
+ sh->success, priv->child_count, type);
+
+ erase_xattr = GF_CALLOC (sizeof (*erase_xattr), priv->child_count,
+ gf_afr_mt_dict_t);
+ if (!erase_xattr)
+ goto out;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->xattr[i]) {
+ call_count++;
+ erase_xattr[i] = dict_new ();
+ if (!erase_xattr[i])
+ goto out;
+ }
+ }
+
+ afr_sh_delta_to_xattr (this, sh->delta_matrix, erase_xattr,
+ priv->child_count, type);
+
+ gf_log (this->name, GF_LOG_DEBUG, "Delta matrix for: %s",
+ lkowner_utoa (&frame->root->lk_owner));
+ afr_sh_print_pending_matrix (sh->delta_matrix, this);
+ local->call_count = call_count;
+ if (call_count == 0) {
+ ret = 0;
+ finish (frame, this);
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!erase_xattr[i])
+ continue;
+
+ if (sh->healing_fd) {//true for ENTRY, reg file DATA transaction
+ STACK_WIND_COOKIE (frame, cbk, (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fxattrop,
+ sh->healing_fd,
+ GF_XATTROP_ADD_ARRAY, erase_xattr[i],
+ NULL);
+ } else {
+ STACK_WIND_COOKIE (frame, cbk, (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->loc,
+ GF_XATTROP_ADD_ARRAY, erase_xattr[i],
+ NULL);
+ }
+ }
+
+ ret = 0;
+out:
+ if (erase_xattr) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (erase_xattr[i]) {
+ dict_unref (erase_xattr[i]);
+ }
+ }
+ }
+
+ GF_FREE (erase_xattr);
+
+ if (ret < 0) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ finish (frame, this);
+ }
+
+ return 0;
+}
+
+void
+afr_set_self_heal_status(afr_self_heal_t *sh, afr_self_heal_status status)
+{
+ xlator_t *this = NULL;
+ afr_sh_status_for_all_type *sh_status = &(sh->afr_all_sh_status);
+ afr_self_heal_type sh_type_in_action = sh->sh_type_in_action;
+ this = THIS;
+
+ if (!sh) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Null self heal"
+ "Structure");
+ goto out;
+ }
+
+ switch (sh_type_in_action) {
+ case AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY:
+ sh_status->gfid_or_missing_entry_self_heal = status;
+ break;
+ case AFR_SELF_HEAL_METADATA:
+ sh_status->metadata_self_heal = status;
+ break;
+ case AFR_SELF_HEAL_DATA:
+ sh_status->data_self_heal = status;
+ break;
+ case AFR_SELF_HEAL_ENTRY:
+ sh_status->entry_self_heal = status;
+ break;
+ case AFR_SELF_HEAL_INVALID:
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid"
+ "self heal type in action");
+ break;
+ }
+out:
+ return;
+}
+
+void
+afr_set_local_for_unhealable (afr_local_t *local)
+{
+ afr_self_heal_t *sh = NULL;
+
+ sh = &local->self_heal;
+
+ local->unhealable = 1;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+}
int
-afr_self_heal (call_frame_t *frame, xlator_t *this,
- int (*completion_cbk) (call_frame_t *, xlator_t *))
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- gf_log (this->name, GF_LOG_TRACE,
- "performing self heal on %s (metadata=%d data=%d entry=%d)",
- local->loc.path,
- local->need_metadata_self_heal,
- local->need_data_self_heal,
- local->need_entry_self_heal);
-
- sh->completion_cbk = completion_cbk;
-
- sh->buf = CALLOC (priv->child_count, sizeof (struct stat));
- sh->child_errno = CALLOC (priv->child_count, sizeof (int));
- sh->success = CALLOC (priv->child_count, sizeof (int));
- sh->xattr = CALLOC (priv->child_count, sizeof (dict_t *));
- sh->sources = CALLOC (sizeof (*sh->sources), priv->child_count);
-
- sh->pending_matrix = CALLOC (sizeof (int32_t *), priv->child_count);
- for (i = 0; i < priv->child_count; i++) {
- sh->pending_matrix[i] = CALLOC (sizeof (int32_t),
- priv->child_count);
- }
+is_self_heal_failed (afr_self_heal_t *sh, afr_sh_fail_check_type type)
+{
+ afr_sh_status_for_all_type sh_status = sh->afr_all_sh_status;
+ afr_self_heal_type sh_type_in_action = AFR_SELF_HEAL_INVALID;
+ afr_self_heal_status status = AFR_SELF_HEAL_FAILED;
+ xlator_t *this = NULL;
+ int sh_failed = 0;
+
+ this = THIS;
+
+ if (!sh) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Null self heal "
+ "structure");
+ sh_failed = 1;
+ goto out;
+ }
- sh->delta_matrix = CALLOC (sizeof (int32_t *), priv->child_count);
- for (i = 0; i < priv->child_count; i++) {
- sh->delta_matrix[i] = CALLOC (sizeof (int32_t),
- priv->child_count);
- }
+ if (type == AFR_CHECK_ALL) {
+ if ((sh_status.gfid_or_missing_entry_self_heal == AFR_SELF_HEAL_FAILED)
+ || (sh_status.metadata_self_heal == AFR_SELF_HEAL_FAILED)
+ || (sh_status.data_self_heal == AFR_SELF_HEAL_FAILED)
+ || (sh_status.entry_self_heal == AFR_SELF_HEAL_FAILED))
+ sh_failed = 1;
+ } else if (type == AFR_CHECK_SPECIFIC) {
+ sh_type_in_action = sh->sh_type_in_action;
+ switch (sh_type_in_action) {
+ case AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY:
+ status = sh_status.gfid_or_missing_entry_self_heal;
+ break;
+ case AFR_SELF_HEAL_METADATA:
+ status = sh_status.metadata_self_heal;
+ break;
+ case AFR_SELF_HEAL_ENTRY:
+ status = sh_status.entry_self_heal;
+ break;
+ case AFR_SELF_HEAL_DATA:
+ status = sh_status.data_self_heal;
+ break;
+ case AFR_SELF_HEAL_INVALID:
+ status = AFR_SELF_HEAL_NOT_ATTEMPTED;
+ break;
+ }
+ if (status == AFR_SELF_HEAL_FAILED)
+ sh_failed = 1;
- if (local->success_count && local->enoent_count) {
- afr_self_heal_missing_entries (frame, this);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "proceeding to metadata check on %s",
- local->loc.path);
- afr_sh_missing_entries_done (frame, this);
- }
+ }
- return 0;
+out:
+ return sh_failed;
+}
+
+char *
+get_sh_completion_status (afr_self_heal_status status)
+{
+
+ char *not_attempted = " is not attempted";
+ char *failed = " failed";
+ char *started = " is started";
+ char *sync_begin = " is successfully completed";
+ char *result = " has unknown status";
+
+ switch (status)
+ {
+ case AFR_SELF_HEAL_NOT_ATTEMPTED:
+ result = not_attempted;
+ break;
+ case AFR_SELF_HEAL_FAILED:
+ result = failed;
+ break;
+ case AFR_SELF_HEAL_STARTED:
+ result = started;
+ break;
+ case AFR_SELF_HEAL_SYNC_BEGIN:
+ result = sync_begin;
+ break;
+ }
+
+ return result;
+
+}
+
+void
+afr_log_self_heal_completion_status (afr_local_t *local, gf_loglevel_t loglvl)
+{
+
+ char sh_log[4096] = {0};
+ afr_self_heal_t *sh = &local->self_heal;
+ afr_sh_status_for_all_type all_status = sh->afr_all_sh_status;
+ xlator_t *this = NULL;
+ size_t off = 0;
+ int data_sh = 0;
+ int metadata_sh = 0;
+ int print_log = 0;
+
+ this = THIS;
+
+ ADD_FMT_STRING (sh_log, off, "gfid or missing entry",
+ all_status.gfid_or_missing_entry_self_heal, print_log);
+ ADD_FMT_STRING_SYNC (sh_log, off, "metadata",
+ all_status.metadata_self_heal, print_log);
+ if (sh->background) {
+ ADD_FMT_STRING_SYNC (sh_log, off, "backgroung data",
+ all_status.data_self_heal, print_log);
+ } else {
+ ADD_FMT_STRING_SYNC (sh_log, off, "foreground data",
+ all_status.data_self_heal, print_log);
+ }
+ ADD_FMT_STRING_SYNC (sh_log, off, "entry", all_status.entry_self_heal,
+ print_log);
+
+ if (AFR_SELF_HEAL_SYNC_BEGIN == all_status.data_self_heal &&
+ strcmp (sh->data_sh_info, "") && sh->data_sh_info )
+ data_sh = 1;
+ if (AFR_SELF_HEAL_SYNC_BEGIN == all_status.metadata_self_heal &&
+ strcmp (sh->metadata_sh_info, "") && sh->metadata_sh_info)
+ metadata_sh = 1;
+
+ if (!print_log)
+ return;
+
+ gf_log (this->name, loglvl, "%s %s %s on %s", sh_log,
+ ((data_sh == 1) ? sh->data_sh_info : ""),
+ ((metadata_sh == 1) ? sh->metadata_sh_info : ""),
+ local->loc.path);
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.h b/xlators/cluster/afr/src/afr-self-heal-common.h
index a311cdf5e..473264776 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.h
+++ b/xlators/cluster/afr/src/afr-self-heal-common.h
@@ -1,70 +1,144 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __AFR_SELF_HEAL_COMMON_H__
#define __AFR_SELF_HEAL_COMMON_H__
-#define FILE_HAS_HOLES(buf) (((buf)->st_size) > ((buf)->st_blocks * 512))
+#define FILE_HAS_HOLES(buf) (((buf)->ia_size) > ((buf)->ia_blocks * 512))
+#define AFR_SH_MIN_PARTICIPANTS 2
typedef enum {
- AFR_SELF_HEAL_ENTRY,
- AFR_SELF_HEAL_METADATA,
- AFR_SELF_HEAL_DATA,
-} afr_self_heal_type;
+ AFR_LOOKUP_FAIL_CONFLICTS = 1,
+ AFR_LOOKUP_FAIL_MISSING_GFIDS = 2,
+} afr_lookup_flags_t;
int
afr_sh_select_source (int sources[], int child_count);
int
-afr_sh_sink_count (int sources[], int child_count);
-
-int
afr_sh_source_count (int sources[], int child_count);
-int
-afr_sh_supress_errenous_children (int sources[], int child_errno[],
- int child_count);
-
void
afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this);
void
-afr_sh_build_pending_matrix (afr_private_t *priv,
- int32_t *pending_matrix[], dict_t *xattr[],
- int child_count, afr_transaction_type type);
+afr_sh_print_split_brain_log (int32_t *pending_matrix[], xlator_t *this,
+ const char *loc);
+
+int
+afr_build_pending_matrix (char **pending_key, int32_t **pending_matrix,
+ unsigned char *ignorant_subvols,
+ dict_t *xattr[], afr_transaction_type type,
+ size_t child_count);
void
afr_sh_pending_to_delta (afr_private_t *priv, dict_t **xattr,
- int32_t *delta_matrix[], int success[],
+ int32_t *delta_matrix[], unsigned char success[],
int child_count, afr_transaction_type type);
int
-afr_sh_mark_sources (afr_self_heal_t *sh, int child_count,
- afr_self_heal_type type);
+afr_mark_sources (xlator_t *this, int32_t *sources, int32_t **pending_matrix,
+ struct iatt *bufs, afr_self_heal_type type,
+ int32_t *success_children, int32_t *subvol_status);
int
-afr_sh_delta_to_xattr (afr_private_t *priv,
+afr_sh_delta_to_xattr (xlator_t *this,
int32_t *delta_matrix[], dict_t *xattr[],
int child_count, afr_transaction_type type);
+void
+afr_self_heal_type_str_get (afr_self_heal_t *self_heal_p, char *str,
+ size_t size);
+
+afr_self_heal_type
+afr_self_heal_type_for_transaction (afr_transaction_type type);
+
int
-afr_sh_is_matrix_zero (int32_t *pending_matrix[], int child_count);
+afr_build_sources (xlator_t *this, dict_t **xattr, struct iatt *bufs,
+ int32_t **pending_matrix, int32_t *sources,
+ int32_t *success_children, afr_transaction_type type,
+ int32_t *subvol_status, gf_boolean_t ignore_ignorant);
+void
+afr_sh_common_reset (afr_self_heal_t *sh, unsigned int child_count);
+void
+afr_sh_common_lookup_resp_handler (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ dict_t *xattr, struct iatt *postparent,
+ loc_t *loc);
+
+int
+afr_sh_common_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ afr_lookup_done_cbk_t lookup_cbk, uuid_t uuid,
+ int32_t flags, dict_t *xdata);
+int
+afr_sh_entry_expunge_remove (call_frame_t *expunge_frame, xlator_t *this,
+ int active_src, struct iatt *buf,
+ struct iatt *parentbuf);
+int
+afr_sh_entrylk (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ char *base_name, afr_lock_cbk_t lock_cbk);
+int
+afr_sh_entry_impunge_create (call_frame_t *impunge_frame, xlator_t *this,
+ int child_index);
+int
+afr_sh_data_unlock (call_frame_t *frame, xlator_t *this, char *dom,
+ afr_lock_cbk_t lock_cbk);
+afr_local_t *
+afr_self_heal_local_init (afr_local_t *l, xlator_t *this);
+int
+afr_sh_data_lock (call_frame_t *frame, xlator_t *this,
+ off_t start, off_t len, gf_boolean_t block, char *dom,
+ afr_lock_cbk_t success_handler,
+ afr_lock_cbk_t failure_handler);
+void
+afr_sh_set_error (afr_self_heal_t *sh, int32_t op_errno);
+void
+afr_sh_mark_source_sinks (call_frame_t *frame, xlator_t *this);
+typedef int
+(*afr_fxattrop_cbk_t) (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xattr, dict_t *xdata);
+int
+afr_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name);
+int
+afr_impunge_frame_create (call_frame_t *frame, xlator_t *this,
+ int active_source, call_frame_t **impunge_frame);
+void
+afr_sh_reset (call_frame_t *frame, xlator_t *this);
+
+void
+afr_children_intersection_get (int32_t *set1, int32_t *set2,
+ int *intersection, unsigned int child_count);
+int
+afr_get_no_xattr_dir_read_child (xlator_t *this, int32_t *success_children,
+ struct iatt *bufs);
+int
+afr_sh_erase_pending (call_frame_t *frame, xlator_t *this,
+ afr_transaction_type type, afr_fxattrop_cbk_t cbk,
+ int (*finish)(call_frame_t *frame, xlator_t *this));
+
+void
+afr_set_local_for_unhealable (afr_local_t *local);
+
+int
+is_self_heal_failed (afr_self_heal_t *sh, afr_sh_fail_check_type type);
+
+void
+afr_set_self_heal_status (afr_self_heal_t *sh, afr_self_heal_status status);
+
+void
+afr_log_self_heal_completion_status (afr_local_t *local, gf_loglevel_t logl);
+char*
+afr_get_pending_matrix_str (int32_t *pending_matrix[], xlator_t *this);
#endif /* __AFR_SELF_HEAL_COMMON_H__ */
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index 06372d47d..9de26ee56 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <libgen.h>
@@ -47,1044 +38,1717 @@
#include "afr-transaction.h"
#include "afr-self-heal.h"
#include "afr-self-heal-common.h"
+#include "afr-self-heal-algorithm.h"
+
+int
+afr_sh_data_fail (call_frame_t *frame, xlator_t *this);
+static inline gf_boolean_t
+afr_sh_data_proceed (unsigned int success_count)
+{
+ return (success_count >= AFR_SH_MIN_PARTICIPANTS);
+}
+extern int
+sh_loop_finish (call_frame_t *loop_frame, xlator_t *this);
int
-afr_sh_data_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
+afr_post_sh_big_lock_success (call_frame_t *frame, xlator_t *this);
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+int
+afr_post_sh_big_lock_failure (call_frame_t *frame, xlator_t *this);
- /*
- TODO: cleanup sh->*
- */
+int
+afr_sh_data_finish (call_frame_t *frame, xlator_t *this);
- gf_log (this->name, GF_LOG_TRACE,
- "self heal of %s completed",
- local->loc.path);
+int
+afr_sh_data_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
- sh->completion_cbk (frame, this);
+ local = frame->local;
+ sh = &local->self_heal;
+
+ sh->completion_cbk (frame, this);
- return 0;
+ return 0;
}
int
afr_sh_data_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_self_heal_t *sh = NULL;
- int call_count = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int child_index = (long) cookie;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- }
- UNLOCK (&frame->lock);
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "flush failed on %s on subvolume %s: %s",
+ local->loc.path, priv->children[child_index]->name,
+ strerror (op_errno));
+ }
+ }
+ UNLOCK (&frame->lock);
- call_count = afr_frame_return (frame);
+ call_count = afr_frame_return (frame);
- if (call_count == 0) {
- fd_unref (sh->healing_fd);
- sh->healing_fd = NULL;
- afr_sh_data_done (frame, this);
- }
+ if (call_count == 0) {
+ afr_sh_data_done (frame, this);
+ }
- return 0;
+ return 0;
}
-
int
-afr_sh_data_utimes_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+afr_sh_data_close (call_frame_t *frame, xlator_t *this)
{
- afr_sh_data_flush_cbk (frame, cookie, this, op_ret, op_errno);
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+ int i = 0;
+ int call_count = 0;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ if (!sh->healing_fd) {
+ //This happens when file is non-reg
+ afr_sh_data_done (frame, this);
+ return 0;
+ }
+ call_count = afr_set_elem_count_get (sh->success,
+ priv->child_count);
+ local->call_count = call_count;
+
+ if (call_count == 0) {
+ afr_sh_data_done (frame, this);
+ return 0;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sh->success[i])
+ continue;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "closing fd of %s on %s",
+ local->loc.path, priv->children[i]->name);
+
+ STACK_WIND_COOKIE (frame, afr_sh_data_flush_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->flush,
+ sh->healing_fd, NULL);
+
+ if (!--call_count)
+ break;
+ }
return 0;
}
-
int
-afr_sh_data_close (call_frame_t *frame, xlator_t *this)
+afr_sh_dom_unlock (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_self_heal_t *sh = NULL;
-
- int i = 0;
- int call_count = 0;
- int source = 0;
- int active_sinks = 0;
-
- struct timespec ts[2];
-
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+
local = frame->local;
sh = &local->self_heal;
priv = this->private;
- source = sh->source;
- active_sinks = sh->active_sinks;
-
-#ifdef HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC
- ts[0] = sh->buf[source].st_atim;
- ts[1] = sh->buf[source].st_mtim;
-
-#elif HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC
- ts[0] = sh->buf[source].st_atimespec;
- ts[1] = sh->buf[source].st_mtimespec;
-#else
- ts[0].tv_sec = sh->buf[source].st_atime;
- ts[1].tv_sec = sh->buf[source].st_mtime;
-#endif
+ if (sh->sh_dom_lock_held)
+ afr_sh_data_unlock (frame, this, priv->sh_domain,
+ afr_sh_data_close);
+ else
+ afr_sh_data_close (frame, this);
+ return 0;
+}
- if (!sh->healing_fd) {
- afr_sh_data_done (frame, this);
- return 0;
- }
+int
+afr_sh_data_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
- call_count = (sh->active_sinks + 1) * 2;
- local->call_count = call_count;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int child_index = (long) cookie;
- /* closed source */
- gf_log (this->name, GF_LOG_TRACE,
- "closing fd of %s on %s",
- local->loc.path, priv->children[sh->source]->name);
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "setattr failed on %s on subvolume %s: %s",
+ local->loc.path, priv->children[child_index]->name,
+ strerror (op_errno));
+ }
+ }
+ UNLOCK (&frame->lock);
- STACK_WIND_COOKIE (frame, afr_sh_data_flush_cbk,
- (void *) (long) sh->source,
- priv->children[sh->source],
- priv->children[sh->source]->fops->flush,
- sh->healing_fd);
- call_count--;
+ call_count = afr_frame_return (frame);
- STACK_WIND_COOKIE (frame, afr_sh_data_utimes_cbk,
- (void *) (long) sh->source,
- priv->children[sh->source],
- priv->children[sh->source]->fops->utimens,
- &local->loc, ts);
-
- call_count--;
+ if (call_count == 0) {
+ afr_sh_data_finish (frame, this);
+ }
- for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] || !local->child_up[i])
- continue;
+ return 0;
+}
- gf_log (this->name, GF_LOG_TRACE,
- "closing fd of %s on %s",
- local->loc.path, priv->children[i]->name);
+int
+afr_sh_data_setattr (call_frame_t *frame, xlator_t *this, struct iatt* stbuf)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+ int i = 0;
+ int call_count = 0;
+ int32_t valid = 0;
- STACK_WIND_COOKIE (frame, afr_sh_data_flush_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->flush,
- sh->healing_fd);
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
- call_count--;
+ valid = (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME);
- STACK_WIND_COOKIE (frame, afr_sh_data_utimes_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->utimens,
- &local->loc, ts);
+ call_count = afr_set_elem_count_get (sh->success,
+ priv->child_count);
+ local->call_count = call_count;
- if (!--call_count)
- break;
- }
+ if (call_count == 0) {
+ GF_ASSERT (0);
+ afr_sh_data_finish (frame, this);
+ return 0;
+ }
- return 0;
-}
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sh->success[i])
+ continue;
+ STACK_WIND_COOKIE (frame, afr_sh_data_setattr_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->setattr,
+ &local->loc, stbuf, valid, NULL);
+
+ if (!--call_count)
+ break;
+ }
+
+ return 0;
+}
int
-afr_sh_data_unlck_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+afr_sh_data_setattr_fstat_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ struct iatt *buf, dict_t *xdata)
{
- afr_local_t * local = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
-
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "locking inode of %s on child %d failed: %s",
- local->loc.path, child_index,
- strerror (op_errno));
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "inode of %s on child %d locked",
- local->loc.path, child_index);
- }
- }
- UNLOCK (&frame->lock);
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int child_index = (long) cookie;
- call_count = afr_frame_return (frame);
+ local = frame->local;
+ sh = &local->self_heal;
+
+ GF_ASSERT (sh->source == child_index);
+ if (op_ret != -1) {
+ sh->buf[child_index] = *buf;
+ afr_sh_data_setattr (frame, this, buf);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Failed to set "
+ "time-stamps after self-heal", local->loc.path);
+ afr_sh_data_fail (frame, this);
+ }
- if (call_count == 0) {
- afr_sh_data_close (frame, this);
- }
+ return 0;
+}
- return 0;
+/*
+ * If there are any writes after the self-heal is triggered then the
+ * stbuf stored in local->self_heal.buf[] will be invalid so we do one more
+ * stat on the source and then set the [am]times
+ */
+int
+afr_sh_set_timestamps (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ STACK_WIND_COOKIE (frame, afr_sh_data_setattr_fstat_cbk,
+ (void *) (long) sh->source,
+ priv->children[sh->source],
+ priv->children[sh->source]->fops->fstat,
+ sh->healing_fd, NULL);
+ return 0;
}
+//Fun fact, lock_cbk is being used for both lock & unlock
+int
+afr_sh_data_unlock (call_frame_t *frame, xlator_t *this, char *dom,
+ afr_lock_cbk_t lock_cbk)
+{
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ if (strcmp (dom, this->name) == 0) {
+ sh->data_lock_held = _gf_false;
+ } else if (strcmp (dom, priv->sh_domain) == 0) {
+ sh->sh_dom_lock_held = _gf_false;
+ } else {
+ ret = -1;
+ goto out;
+ }
+ int_lock->lock_cbk = lock_cbk;
+ int_lock->domain = dom;
+ afr_unlock (frame, this);
+
+out:
+ if (ret) {
+ int_lock->lock_op_ret = -1;
+ int_lock->lock_cbk (frame, this);
+ }
+ return 0;
+}
int
-afr_sh_data_unlock (call_frame_t *frame, xlator_t *this)
+afr_sh_data_finish (call_frame_t *frame, xlator_t *this)
{
- struct flock flock;
- int i = 0;
- int call_count = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- afr_self_heal_t * sh = NULL;
+ local = frame->local;
+ sh = &local->self_heal;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "finishing data selfheal of %s", local->loc.path);
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ if (sh->data_lock_held)
+ afr_sh_data_unlock (frame, this, this->name, afr_sh_dom_unlock);
+ else
+ afr_sh_dom_unlock (frame, this);
- call_count = local->child_count;
+ return 0;
+}
- local->call_count = call_count;
+int
+afr_sh_data_fail (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
- flock.l_start = 0;
- flock.l_len = 0;
- flock.l_type = F_UNLCK;
+ local = frame->local;
+ sh = &local->self_heal;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- gf_log (this->name, GF_LOG_TRACE,
- "unlocking %s on subvolume %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_data_unlck_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- this->name,
- &local->loc, F_SETLK, &flock);
- if (!--call_count)
- break;
- }
- }
+ gf_log (this->name, GF_LOG_DEBUG,
+ "finishing failed data selfheal of %s", local->loc.path);
- return 0;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_data_finish (frame, this);
+ return 0;
}
-
int
-afr_sh_data_finish (call_frame_t *frame, xlator_t *this)
+afr_sh_data_erase_pending_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xattr, dict_t *xdata)
{
- afr_local_t *local = NULL;
+ int call_count = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int32_t child_index = (long) cookie;
- local = frame->local;
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Erasing of pending change "
+ "log failed on %s for subvol %s, reason: %s",
+ local->loc.path, priv->children[child_index]->name,
+ strerror (op_errno));
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ }
- gf_log (this->name, GF_LOG_TRACE,
- "finishing data selfheal of %s", local->loc.path);
+ call_count = afr_frame_return (frame);
- afr_sh_data_unlock (frame, this);
+ if (call_count == 0) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
+ if (sh->old_loop_frame)
+ sh_loop_finish (sh->old_loop_frame, this);
+ sh->old_loop_frame = NULL;
+ afr_sh_data_fail (frame, this);
+ goto out;
+ }
+ if (!IA_ISREG (sh->type)) {
+ afr_sh_data_finish (frame, this);
+ goto out;
+ }
+ GF_ASSERT (sh->old_loop_frame);
+ afr_sh_data_lock (frame, this, 0, 0, _gf_true, this->name,
+ afr_post_sh_big_lock_success,
+ afr_post_sh_big_lock_failure);
+ }
+out:
+ return 0;
+}
- return 0;
+int
+afr_sh_data_erase_pending (call_frame_t *frame, xlator_t *this)
+{
+ afr_sh_erase_pending (frame, this, AFR_DATA_TRANSACTION,
+ afr_sh_data_erase_pending_cbk,
+ afr_sh_data_finish);
+ return 0;
}
+int
+afr_sh_data_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+ int call_count = 0;
+ int child_index = (long) cookie;
+
+ local = frame->local;
+ priv = this->private;
+ sh = &local->self_heal;
+
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Failed to fsync on "
+ "%s - %s", local->loc.path,
+ priv->children[child_index]->name, strerror (op_errno));
+ LOCK (&frame->lock);
+ {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ }
+ UNLOCK (&frame->lock);
+ if (sh->old_loop_frame)
+ sh_loop_finish (sh->old_loop_frame, this);
+ sh->old_loop_frame = NULL;
+ }
+
+ call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC))
+ afr_sh_data_fail (frame, this);
+ else
+ afr_sh_data_erase_pending (frame, this);
+ }
+ return 0;
+}
+/*
+ * Before erasing xattrs, make sure the data is written to disk
+ */
int
-afr_sh_data_erase_pending_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xattr)
+afr_sh_data_fsync (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+ int i = 0;
+ int call_count = 0;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ local = frame->local;
+ priv = this->private;
+ sh = &local->self_heal;
- LOCK (&frame->lock);
- {
- }
- UNLOCK (&frame->lock);
+ call_count = sh->active_sinks;
+ if (call_count == 0) {
+ afr_sh_data_erase_pending (frame, this);
+ return 0;
+ }
- call_count = afr_frame_return (frame);
+ local->call_count = call_count;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sh->success[i] || sh->sources[i])
+ continue;
- if (call_count == 0)
- afr_sh_data_finish (frame, this);
+ STACK_WIND_COOKIE (frame, afr_sh_data_fsync_cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->fsync,
+ sh->healing_fd, 1, NULL);
+ }
- return 0;
+ return 0;
}
-
-int
-afr_sh_data_erase_pending (call_frame_t *frame, xlator_t *this)
+static struct afr_sh_algorithm *
+sh_algo_from_name (xlator_t *this, char *name)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int i = 0;
- dict_t **erase_xattr = NULL;
+ int i = 0;
+ if (name == NULL)
+ goto out;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ while (afr_self_heal_algorithms[i].name) {
+ if (!strcmp (name, afr_self_heal_algorithms[i].name)) {
+ return &afr_self_heal_algorithms[i];
+ }
- afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix, sh->success,
- priv->child_count, AFR_DATA_TRANSACTION);
+ i++;
+ }
- erase_xattr = CALLOC (sizeof (*erase_xattr), priv->child_count);
+out:
+ return NULL;
+}
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i]) {
- call_count++;
- erase_xattr[i] = get_new_dict();
- dict_ref (erase_xattr[i]);
- }
- }
+static int
+sh_zero_byte_files_exist (afr_local_t *local, int child_count)
+{
+ int i = 0;
+ int ret = 0;
+ afr_self_heal_t *sh = NULL;
+
+ sh = &local->self_heal;
+ for (i = 0; i < child_count; i++) {
+ if (!local->child_up[i] || sh->child_errno[i])
+ continue;
+ if (sh->buf[i].ia_size == 0) {
+ ret = 1;
+ break;
+ }
+ }
- afr_sh_delta_to_xattr (priv, sh->delta_matrix, erase_xattr,
- priv->child_count, AFR_DATA_TRANSACTION);
+ return ret;
+}
- local->call_count = call_count;
- for (i = 0; i < priv->child_count; i++) {
- if (!erase_xattr[i])
- continue;
-
- gf_log (this->name, GF_LOG_TRACE,
- "erasing pending flags from %s on %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_data_erase_pending_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->loc,
- GF_XATTROP_ADD_ARRAY, erase_xattr[i]);
- if (!--call_count)
- break;
- }
- for (i = 0; i < priv->child_count; i++) {
- if (erase_xattr[i]) {
- dict_unref (erase_xattr[i]);
- }
- }
- FREE (erase_xattr);
+struct afr_sh_algorithm *
+afr_sh_data_pick_algo (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t * priv = NULL;
+ struct afr_sh_algorithm * algo = NULL;
+ afr_local_t * local = NULL;
+ afr_self_heal_t * sh = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+ algo = sh_algo_from_name (this, priv->data_self_heal_algorithm);
+
+ if (algo == NULL) {
+ /* option not set, so fall back on heuristics */
+
+ if (sh_zero_byte_files_exist (local, priv->child_count)
+ || (sh->file_size <= (priv->data_self_heal_window_size *
+ this->ctx->page_size))) {
+
+ /*
+ * If the file does not exist on one of the subvolumes,
+ * or a zero-byte file exists (created by entry self-heal)
+ * the entire content has to be copied anyway, so there
+ * is no benefit from using the "diff" algorithm.
+ *
+ * If the file size is about the same as page size,
+ * the entire file can be read and written with a few
+ * (pipelined) STACK_WINDs, which will be faster
+ * than "diff" which has to read checksums and then
+ * read and write.
+ */
+
+ algo = sh_algo_from_name (this, "full");
+
+ } else {
+ algo = sh_algo_from_name (this, "diff");
+ }
+ }
- return 0;
+ return algo;
}
int
+afr_sh_data_sync_prepare (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ struct afr_sh_algorithm *sh_algo = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ sh->algo_completion_cbk = afr_sh_data_fsync;
+ sh->algo_abort_cbk = afr_sh_data_fail;
+
+ sh_algo = afr_sh_data_pick_algo (frame, this);
+
+ sh->algo = sh_algo;
+ sh_algo->fn (frame, this);
+
+ return 0;
+}
+
+int
afr_sh_data_trim_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t *sh = NULL;
- int call_count = 0;
- int child_index = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1)
- gf_log (this->name, GF_LOG_DEBUG,
- "ftruncate of %s on subvolume %s failed (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- else
- gf_log (this->name, GF_LOG_TRACE,
- "ftruncate of %s on subvolume %s completed",
- local->loc.path,
- priv->children[child_index]->name);
- }
- UNLOCK (&frame->lock);
+ int call_count = 0;
+ int child_index = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
- call_count = afr_frame_return (frame);
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
- if (call_count == 0) {
- afr_sh_data_erase_pending (frame, this);
- }
+ child_index = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "ftruncate of %s on subvolume %s failed (%s)",
+ local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "ftruncate of %s on subvolume %s completed",
+ local->loc.path,
+ priv->children[child_index]->name);
+ }
+ }
+ UNLOCK (&frame->lock);
- return 0;
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC))
+ afr_sh_data_fail (frame, this);
+ else
+ afr_sh_data_sync_prepare (frame, this);
+ }
+
+ return 0;
}
int
afr_sh_data_trim_sinks (call_frame_t *frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t *sh = NULL;
- int *sources = NULL;
- int call_count = 0;
- int i = 0;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int *sources = NULL;
+ int call_count = 0;
+ int i = 0;
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
- sources = sh->sources;
- call_count = sh->active_sinks;
+ sources = sh->sources;
+ call_count = sh->active_sinks;
- local->call_count = call_count;
+ local->call_count = call_count;
- for (i = 0; i < priv->child_count; i++) {
- if (sources[i] || !local->child_up[i])
- continue;
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i] || !local->child_up[i])
+ continue;
- STACK_WIND_COOKIE (frame, afr_sh_data_trim_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->ftruncate,
- sh->healing_fd, sh->file_size);
+ STACK_WIND_COOKIE (frame, afr_sh_data_trim_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->ftruncate,
+ sh->healing_fd, sh->file_size,
+ NULL);
- if (!--call_count)
- break;
- }
+ if (!--call_count)
+ break;
+ }
- return 0;
+ return 0;
}
-
int
-afr_sh_data_read_write_iter (call_frame_t *frame, xlator_t *this);
-
-int
-afr_sh_data_write_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+afr_sh_inode_set_read_ctx (afr_self_heal_t *sh, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t *sh = NULL;
-
- int child_index = (long) cookie;
- int call_count = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- gf_log (this->name, GF_LOG_TRACE,
- "wrote %d bytes of data from %s to child %d, offset %"PRId64"",
- op_ret, local->loc.path, child_index, sh->offset - op_ret);
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "write to %s failed on subvolume %s (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- sh->op_failed = 1;
- }
- }
- UNLOCK (&frame->lock);
+ afr_private_t *priv = NULL;
+ int ret = 0;
+ int i = 0;
+
+ priv = this->private;
+ sh->source = afr_sh_select_source (sh->sources, priv->child_count);
+ if (sh->source < 0) {
+ ret = -1;
+ goto out;
+ }
- call_count = afr_frame_return (frame);
+ /* detect changes not visible through pending flags -- JIC */
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == sh->source || sh->child_errno[i])
+ continue;
- if (call_count == 0) {
- afr_sh_data_read_write_iter (frame, this);
- }
+ if (SIZE_DIFFERS (&sh->buf[i], &sh->buf[sh->source]))
+ sh->sources[i] = 0;
+ }
- return 0;
+ afr_reset_children (sh->fresh_children, priv->child_count);
+ afr_get_fresh_children (sh->success_children, sh->sources,
+ sh->fresh_children, priv->child_count);
+ afr_inode_set_read_ctx (this, sh->inode, sh->source,
+ sh->fresh_children);
+out:
+ return ret;
}
-
-int
-afr_sh_data_read_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iovec *vector, int32_t count, struct stat *buf,
- struct iobref *iobref)
+char*
+afr_get_sizes_str (afr_local_t *local, struct iatt *bufs, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ char num[1024] = {0};
+ size_t len = 0;
+ char *sizes_str = NULL;
+ size_t off = 0;
+ char *fmt_str = "%llu bytes on %s, ";
+ char *child_down = " %s,";
+ char *child_unknown = " %s,";
+ int down_child_present = 0;
+ int down_count = 0;
+ int unknown_count = 0;
+ int unknown_child_present = 0;
+ char *down_subvol_1 = " down subvolume is ";
+ char *unknown_subvol_1 = " unknown subvolume is ";
+ char *down_subvol_2 = " down subvolumes are ";
+ char *unknown_subvol_2 = " unknown subvolumes are ";
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i] == 1) {
+ len += snprintf (num, sizeof (num), fmt_str,
+ (unsigned long long) bufs[i].ia_size,
+ priv->children[i]->name);
+ } else if (local->child_up[i] == 0) {
+ len += snprintf (num, sizeof (num), child_down,
+ priv->children[i]->name);
+ if (!down_child_present)
+ down_child_present = 1;
+ down_count ++;
+ } else if (local->child_up[i] == -1) {
+ len += snprintf (num, sizeof (num), child_unknown,
+ priv->children[i]->name);
+ if (!unknown_child_present)
+ unknown_child_present = 1;
+ unknown_count++;
+ }
- int child_index = (long) cookie;
- int i = 0;
- int call_count = 0;
+ }
- off_t offset;
+ if (down_child_present) {
+ if (down_count > 1)
+ len += snprintf (num, sizeof (num), "%s",
+ down_subvol_2);
+ else
+ len += snprintf (num, sizeof (num), "%s",
+ down_subvol_1);
+ }
+ if (unknown_child_present) {
+ if (unknown_count > 1)
+ len += snprintf (num, sizeof (num), "%s",
+ unknown_subvol_2);
+ else
+ len += snprintf (num, sizeof (num), "%s",
+ unknown_subvol_1);
+ }
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
+ len++;//for '\0'
- call_count = sh->active_sinks;
+ sizes_str = GF_CALLOC (len, sizeof (char), gf_common_mt_char);
- local->call_count = call_count;
+ if (!sizes_str)
+ return NULL;
- gf_log (this->name, GF_LOG_TRACE,
- "read %d bytes of data from %s on child %d, offset %"PRId64"",
- op_ret, local->loc.path, child_index, sh->offset);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i] == 1) {
+ off += snprintf (sizes_str + off, len - off, fmt_str,
+ (unsigned long long) bufs[i].ia_size,
+ priv->children[i]->name);
+ }
+ }
- if (op_ret <= 0) {
- afr_sh_data_trim_sinks (frame, this);
- return 0;
- }
+ if (down_child_present) {
+ if (down_count > 1) {
+ off += snprintf (sizes_str + off, len - off, "%s",
+ down_subvol_2);
+ } else {
+ off += snprintf (sizes_str + off, len - off, "%s",
+ down_subvol_1);
+ }
+ }
- /* what if we read less than block size? */
- offset = sh->offset;
- sh->offset += op_ret;
-
- if (sh->file_has_holes) {
- if (iov_0filled (vector, count) == 0) {
- /* the iter function depends on the
- sh->offset already being updated
- above
- */
- afr_sh_data_read_write_iter (frame, this);
- goto out;
- }
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i] == 0) {
+ off += snprintf (sizes_str + off, len - off, child_down,
+ priv->children[i]->name);
+ }
+ }
- for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] || !local->child_up[i])
- continue;
-
- /* this is a sink, so write to it */
- STACK_WIND_COOKIE (frame, afr_sh_data_write_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->writev,
- sh->healing_fd, vector, count, offset,
- iobref);
-
- if (!--call_count)
- break;
- }
+ if (unknown_child_present) {
+ if (unknown_count > 1) {
+ off += snprintf (sizes_str + off, len - off, "%s",
+ unknown_subvol_2);
+ } else {
+ off += snprintf (sizes_str + off, len - off, "%s",
+ unknown_subvol_1);
+ }
+ }
-out:
- return 0;
-}
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i] == -1) {
+ off += snprintf (sizes_str + off, len - off,
+ child_unknown,
+ priv->children[i]->name);
+ }
+ }
-int
-afr_sh_data_read_write (call_frame_t *frame, xlator_t *this)
+ return sizes_str;
+}
+
+char*
+afr_get_sinks_str (xlator_t *this, afr_local_t *local, afr_self_heal_t *sh)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t *sh = NULL;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- STACK_WIND_COOKIE (frame, afr_sh_data_read_cbk,
- (void *) (long) sh->source,
- priv->children[sh->source],
- priv->children[sh->source]->fops->readv,
- sh->healing_fd, sh->block_size,
- sh->offset);
-
- return 0;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ char num[1024] = {0};
+ size_t len = 0;
+ char *sinks_str = NULL;
+ char *temp_str = " to sinks ";
+ char *str_format = " %s,";
+ char off = 0;
+
+ priv = this->private;
+
+ len += snprintf (num, sizeof (num), "%s", temp_str);
+ for (i = 0; i < priv->child_count; i++) {
+ if ((sh->sources[i] == 0) && (local->child_up[i] == 1)) {
+ len += snprintf (num, sizeof (num), str_format,
+ priv->children[i]->name);
+ }
+ }
+
+ len ++;
+
+ sinks_str = GF_CALLOC (len, sizeof (char), gf_common_mt_char);
+
+ if (!sinks_str)
+ return NULL;
+
+ off += snprintf (sinks_str + off, len - off, "%s", temp_str);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if ((sh->sources[i] == 0) && (local->child_up[i] == 1)) {
+ off += snprintf (sinks_str + off, len - off,
+ str_format,
+ priv->children[i]->name);
+ }
+ }
+
+ return sinks_str;
+
}
-int
-afr_sh_data_read_write_iter (call_frame_t *frame, xlator_t *this)
+void
+afr_set_data_sh_info_str (afr_local_t *local, afr_self_heal_t *sh, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t *sh = NULL;
+ char *pending_matrix_str = NULL;
+ char *sizes_str = NULL;
+ char *sinks_str = NULL;
+ afr_private_t *priv = NULL;
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
+ priv = this->private;
- if (sh->op_failed) {
- afr_sh_data_finish (frame, this);
- goto out;
- }
+ pending_matrix_str = afr_get_pending_matrix_str (sh->pending_matrix,
+ this);
+ if (!pending_matrix_str)
+ pending_matrix_str = "";
- if (sh->offset >= sh->file_size) {
- gf_log (this->name, GF_LOG_TRACE,
- "closing fd's of %s",
- local->loc.path);
- afr_sh_data_trim_sinks (frame, this);
+ sizes_str = afr_get_sizes_str (local, sh->buf, this);
+ if (!sizes_str)
+ sizes_str = "";
- goto out;
- }
+ sinks_str = afr_get_sinks_str (this, local, sh);
+ if (!sinks_str)
+ sinks_str = "";
- afr_sh_data_read_write (frame, this);
+ gf_asprintf (&sh->data_sh_info, " data self heal from %s %s with "
+ "%s data %s", priv->children[sh->source]->name, sinks_str,
+ sizes_str, pending_matrix_str);
-out:
- return 0;
+ if (pending_matrix_str && strcmp (pending_matrix_str, ""))
+ GF_FREE (pending_matrix_str);
+
+ if (sizes_str && strcmp (sizes_str, ""))
+ GF_FREE (sizes_str);
}
+void
+afr_sh_data_fix (call_frame_t *frame, xlator_t *this)
+{
+ int source = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ source = sh->source;
+ sh->block_size = this->ctx->page_size;
+ sh->file_size = sh->buf[source].ia_size;
+
+ if (FILE_HAS_HOLES (&sh->buf[source]))
+ sh->file_has_holes = 1;
+
+ if (sh->background && sh->unwind && !sh->unwound) {
+ sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno,
+ is_self_heal_failed (sh, AFR_CHECK_SPECIFIC));
+ sh->unwound = _gf_true;
+ }
+
+ afr_sh_mark_source_sinks (frame, this);
+ if (sh->active_sinks == 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "no active sinks for performing self-heal on file %s",
+ local->loc.path);
+ afr_sh_data_finish (frame, this);
+ return;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "self-healing file %s from subvolume %s to %d other",
+ local->loc.path, priv->children[sh->source]->name,
+ sh->active_sinks);
+
+ sh->actual_sh_started = _gf_true;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_SYNC_BEGIN);
+ afr_sh_data_trim_sinks (frame, this);
+}
int
-afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+afr_sh_data_fxattrop_fstat_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int child_index = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- child_index = (long) cookie;
-
- /* TODO: some of the open's might fail.
- In that case, modify cleanup fn to send flush on those
- fd's which are already open */
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "open of %s failed on child %s (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- sh->op_failed = 1;
- }
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int nsources = 0;
+ int ret = 0;
+ int *old_sources = NULL;
+ int tstamp_source = 0;
+ int i = 0;
- }
- UNLOCK (&frame->lock);
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ gf_log (this->name, GF_LOG_DEBUG, "Pending matrix for: %s",
+ lkowner_utoa (&frame->root->lk_owner));
+ if (sh->sync_done) {
+ //store sources before sync so that mtime can be set using the
+ //iatt buf from one of them.
+ old_sources = alloca (priv->child_count*sizeof (*old_sources));
+ memcpy (old_sources, sh->sources,
+ priv->child_count * sizeof (*old_sources));
+ }
- call_count = afr_frame_return (frame);
+ nsources = afr_build_sources (this, sh->xattr, sh->buf, sh->pending_matrix,
+ sh->sources, sh->success_children,
+ AFR_DATA_TRANSACTION, NULL, _gf_true);
+ if ((nsources == -1)
+ && (priv->favorite_child != -1)
+ && (sh->child_errno[priv->favorite_child] == 0)) {
- if (call_count == 0) {
- if (sh->op_failed) {
- afr_sh_data_finish (frame, this);
- return 0;
- }
- gf_log (this->name, GF_LOG_TRACE,
- "fd for %s opened, commencing sync",
- local->loc.path);
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Picking favorite child %s as authentic source to "
+ "resolve conflicting data of %s",
+ priv->children[priv->favorite_child]->name,
+ local->loc.path);
- gf_log (this->name, GF_LOG_TRACE,
- "sourcing file %s from %s to other sinks",
- local->loc.path, priv->children[sh->source]->name);
+ sh->sources[priv->favorite_child] = 1;
- afr_sh_data_read_write (frame, this);
- }
+ nsources = afr_sh_source_count (sh->sources,
+ priv->child_count);
+ }
- return 0;
-}
+ if (nsources == -1) {
+ afr_sh_print_split_brain_log (sh->pending_matrix, this,
+ local->loc.path);
+ afr_set_split_brain (this, sh->inode, DONT_KNOW, SPB);
+
+ afr_sh_data_fail (frame, this);
+ return 0;
+ }
+
+ afr_set_split_brain (this, sh->inode, DONT_KNOW, NO_SPB);
+
+ ret = afr_sh_inode_set_read_ctx (sh, this);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "No active sources found.");
+
+ afr_sh_data_fail (frame, this);
+ return 0;
+ }
+ if (sh->sync_done) {
+ /* Perform setattr from one of the old_sources if possible
+ * Because only they have the correct mtime, the new sources
+ * (i.e. old sinks) have mtime from last writev in sync.
+ */
+ tstamp_source = sh->source;
+ for (i = 0; i < priv->child_count; i++) {
+ if (old_sources[i] && sh->sources[i])
+ tstamp_source = i;
+ }
+ afr_sh_data_setattr (frame, this, &sh->buf[tstamp_source]);
+ } else {
+ afr_set_data_sh_info_str (local, sh, this);
+ if (nsources == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "No self-heal needed for %s",
+ local->loc.path);
+
+ afr_sh_data_finish (frame, this);
+ return 0;
+ }
+
+ if (sh->do_data_self_heal &&
+ afr_data_self_heal_enabled (priv->data_self_heal))
+ afr_sh_data_fix (frame, this);
+ else
+ afr_sh_data_finish (frame, this);
+ }
+ return 0;
+}
int
-afr_sh_data_open (call_frame_t *frame, xlator_t *this)
+afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,
+ dict_t **xattr,
+ afr_transaction_type txn_type,
+ uuid_t gfid)
{
- int i = 0;
- int call_count = 0;
-
- int source = -1;
- int *sources = NULL;
+ afr_private_t *priv = NULL;
+ int read_child = -1;
+ int32_t **pending_matrix = NULL;
+ int32_t *sources = NULL;
+ int32_t *success_children = NULL;
+ struct iatt *bufs = NULL;
+ int32_t nsources = 0;
+ int32_t prev_read_child = -1;
+ int32_t config_read_child = -1;
+ int32_t subvol_status = 0;
+
+ priv = this->private;
+ bufs = local->cont.lookup.bufs;
+ success_children = local->cont.lookup.success_children;
+
+ pending_matrix = local->cont.lookup.pending_matrix;
+ sources = local->cont.lookup.sources;
+ memset (sources, 0, sizeof (*sources) * priv->child_count);
+
+ nsources = afr_build_sources (this, xattr, bufs, pending_matrix,
+ sources, success_children, txn_type,
+ &subvol_status, _gf_false);
+ if (subvol_status & SPLIT_BRAIN) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s: Possible split-brain",
+ local->loc.path);
+ switch (txn_type) {
+ case AFR_DATA_TRANSACTION:
+ local->cont.lookup.possible_spb = _gf_true;
+ nsources = 1;
+ sources[success_children[0]] = 1;
+ break;
+ case AFR_ENTRY_TRANSACTION:
+ read_child = afr_get_no_xattr_dir_read_child (this,
+ success_children,
+ bufs);
+ sources[read_child] = 1;
+ nsources = 1;
+ break;
+ default:
+ break;
+ }
+ }
+ if (nsources < 0)
+ goto out;
+
+ prev_read_child = local->read_child_index;
+ config_read_child = priv->read_child;
+ read_child = afr_select_read_child_from_policy (success_children,
+ priv->child_count,
+ prev_read_child,
+ config_read_child,
+ sources,
+ priv->hash_mode, gfid);
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "returning read_child: %d",
+ read_child);
+ return read_child;
+}
- fd_t *fd = NULL;
+int
+afr_sh_data_fstat_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ struct iatt *buf, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int call_count = -1;
+ int child_index = (long) cookie;
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- afr_self_heal_t *sh = NULL;
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret != -1) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "fstat of %s on %s succeeded",
+ local->loc.path,
+ priv->children[child_index]->name);
+
+ sh->buf[child_index] = *buf;
+ sh->success_children[sh->success_count] = child_index;
+ sh->success_count++;
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "%s: fstat failed "
+ "on %s, reason %s", local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ sh->child_errno[child_index] = op_errno;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ /* Previous versions of glusterfs might have set
+ * the pending data xattrs which need to be erased
+ */
+ if (!afr_sh_data_proceed (sh->success_count)) {
+ gf_log (this->name, GF_LOG_ERROR, "inspecting metadata "
+ "succeeded on < %d children, aborting "
+ "self-heal for %s", AFR_SH_MIN_PARTICIPANTS,
+ local->loc.path);
+ afr_sh_data_fail (frame, this);
+ goto out;
+ }
+ afr_sh_data_fxattrop_fstat_done (frame, this);
+ }
+out:
+ return 0;
+}
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
- call_count = sh->active_sinks + 1;
- local->call_count = call_count;
+int
+afr_sh_data_fstat (call_frame_t *frame, xlator_t *this)
+{
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int i = 0;
+ int child = 0;
+ int32_t *fstat_children = NULL;
- fd = fd_create (local->loc.inode, frame->root->pid);
- sh->healing_fd = fd;
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
- source = local->self_heal.source;
- sources = local->self_heal.sources;
+ fstat_children = memdup (sh->success_children,
+ sizeof (*fstat_children) * priv->child_count);
+ if (!fstat_children) {
+ afr_sh_data_fail (frame, this);
+ goto out;
+ }
+ call_count = sh->success_count;
+ local->call_count = call_count;
+
+ memset (sh->buf, 0, sizeof (*sh->buf) * priv->child_count);
+ afr_reset_children (sh->success_children, priv->child_count);
+ sh->success_count = 0;
+ for (i = 0; i < priv->child_count; i++) {
+ child = fstat_children[i];
+ if (child == -1)
+ break;
+ STACK_WIND_COOKIE (frame, afr_sh_data_fstat_cbk,
+ (void *) (long) child,
+ priv->children[child],
+ priv->children[child]->fops->fstat,
+ sh->healing_fd, NULL);
+ --call_count;
+ }
+ GF_ASSERT (!call_count);
+out:
+ GF_FREE (fstat_children);
+ return 0;
+}
- sh->block_size = 65536;
- sh->file_size = sh->buf[source].st_size;
+void
+afr_sh_common_fxattrop_resp_handler (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xattr)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int child_index = (long) cookie;
- if (FILE_HAS_HOLES (&sh->buf[source]))
- sh->file_has_holes = 1;
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret != -1) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "fxattrop of %s on %s succeeded",
+ local->loc.path,
+ priv->children[child_index]->name);
+
+ sh->xattr[child_index] = dict_ref (xattr);
+ sh->success_children[sh->success_count] = child_index;
+ sh->success_count++;
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "fxattrop of %s "
+ "failed on %s, reason %s", local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ sh->child_errno[child_index] = op_errno;
+ }
+ }
+ UNLOCK (&frame->lock);
+}
- /* open source */
- STACK_WIND_COOKIE (frame, afr_sh_data_open_cbk,
- (void *) (long) source,
- priv->children[source],
- priv->children[source]->fops->open,
- &local->loc, O_RDONLY|O_LARGEFILE, fd);
- call_count--;
+int
+afr_sh_data_fxattrop_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xattr, dict_t *xdata)
+{
+ int call_count = -1;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
- /* open sinks */
- for (i = 0; i < priv->child_count; i++) {
- if(sources[i] || !local->child_up[i])
- continue;
-
- STACK_WIND_COOKIE (frame, afr_sh_data_open_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->open,
- &local->loc,
- O_WRONLY|O_LARGEFILE, fd);
-
- if (!--call_count)
- break;
- }
+ local = frame->local;
+ sh = &local->self_heal;
- return 0;
+ afr_sh_common_fxattrop_resp_handler (frame, cookie, this, op_ret,
+ op_errno, xattr);
+
+ call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ if (!afr_sh_data_proceed (sh->success_count)) {
+ gf_log (this->name, GF_LOG_ERROR, "%s, inspecting "
+ "change log succeeded on < %d children",
+ local->loc.path, AFR_SH_MIN_PARTICIPANTS);
+ afr_sh_data_fail (frame, this);
+ goto out;
+ }
+ afr_sh_data_fstat (frame, this);
+ }
+out:
+ return 0;
}
int
-afr_sh_data_sync_prepare (call_frame_t *frame, xlator_t *this)
+afr_sh_data_fxattrop (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int active_sinks = 0;
- int source = 0;
- int i = 0;
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t **xattr_req;
+ int32_t *zero_pending = NULL;
+ int call_count = 0;
+ int i = 0;
+ int ret = 0;
+ int j;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ call_count = afr_up_children_count (local->child_up,
+ priv->child_count);
- source = sh->source;
+ local->call_count = call_count;
+
+ xattr_req = GF_CALLOC(priv->child_count, sizeof(struct dict_t *),
+ gf_afr_mt_dict_t);
+ if (!xattr_req)
+ goto out;
for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] == 0 && local->child_up[i] == 1) {
- active_sinks++;
- sh->success[i] = 1;
+ xattr_req[i] = dict_new();
+ if (!xattr_req[i]) {
+ ret = -1;
+ goto out;
}
}
- sh->success[source] = 1;
-
- if (active_sinks == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "no active sinks for performing self-heal on file %s",
- local->loc.path);
- afr_sh_data_finish (frame, this);
- return 0;
+
+ for (i = 0; i < priv->child_count; i++) {
+ for (j = 0; j < priv->child_count; j++) {
+ zero_pending = GF_CALLOC (3, sizeof (*zero_pending),
+ gf_afr_mt_int32_t);
+ if (!zero_pending) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_dynptr (xattr_req[i], priv->pending_key[j],
+ zero_pending,
+ 3 * sizeof (*zero_pending));
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unable to set dict value");
+ goto out;
+ } else {
+ zero_pending = NULL;
+ }
+ }
}
- sh->active_sinks = active_sinks;
- gf_log (this->name, GF_LOG_DEBUG,
- "self-healing file %s from subvolume %s to %d other",
- local->loc.path, priv->children[source]->name, active_sinks);
+ afr_reset_xattr (sh->xattr, priv->child_count);
+ afr_reset_children (sh->success_children, priv->child_count);
+ memset (sh->child_errno, 0,
+ sizeof (*sh->child_errno) * priv->child_count);
+ sh->success_count = 0;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_sh_data_fxattrop_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fxattrop,
+ sh->healing_fd, GF_XATTROP_ADD_ARRAY,
+ xattr_req[i], NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+out:
+ if (xattr_req) {
+ for (i = 0; i < priv->child_count; i++)
+ if (xattr_req[i])
+ dict_unref(xattr_req[i]);
+ GF_FREE(xattr_req);
+ }
- afr_sh_data_open (frame, this);
+ if (ret) {
+ GF_FREE (zero_pending);
+ afr_sh_data_fail (frame, this);
+ }
- return 0;
+ return 0;
}
+int
+afr_sh_data_big_lock_success (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ sh->data_lock_held = _gf_true;
+ afr_sh_data_fxattrop (frame, this);
+ return 0;
+}
int
-afr_sh_data_fix (call_frame_t *frame, xlator_t *this)
+afr_sh_dom_lock_success (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int nsources = 0;
- int source = 0;
- int i = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
- afr_sh_build_pending_matrix (priv, sh->pending_matrix, sh->xattr,
- priv->child_count, AFR_DATA_TRANSACTION);
+ sh->sh_dom_lock_held = _gf_true;
+ afr_sh_data_lock (frame, this, 0, 0, _gf_true, this->name,
+ afr_sh_data_big_lock_success,
+ afr_sh_data_fail);
+ return 0;
+}
- afr_sh_print_pending_matrix (sh->pending_matrix, this);
+int
+afr_sh_data_post_blocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
- nsources = afr_sh_mark_sources (sh, priv->child_count,
- AFR_SELF_HEAL_DATA);
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ sh = &local->self_heal;
- afr_sh_supress_errenous_children (sh->sources, sh->child_errno,
- priv->child_count);
+ if (int_lock->lock_op_ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Blocking data inodelks "
+ "failed for %s. by %s",
+ local->loc.path, lkowner_utoa (&frame->root->lk_owner));
- if (nsources == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "No self-heal needed for %s",
- local->loc.path);
+ sh->data_lock_failure_handler (frame, this);
+ } else {
- afr_sh_data_finish (frame, this);
- return 0;
+ gf_log (this->name, GF_LOG_DEBUG, "Blocking data inodelks "
+ "done for %s by %s. Proceding to self-heal",
+ local->loc.path, lkowner_utoa (&frame->root->lk_owner));
+
+ sh->data_lock_success_handler (frame, this);
}
- if ((nsources == -1)
- && (priv->favorite_child != -1)
- && (sh->child_errno[priv->favorite_child] == 0)) {
+ return 0;
+}
- gf_log (this->name, GF_LOG_DEBUG,
- "Picking favorite child %s as authentic source to resolve conflicting data of %s",
- priv->children[priv->favorite_child]->name,
- local->loc.path);
+int
+afr_sh_data_post_nonblocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
- sh->sources[priv->favorite_child] = 1;
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ sh = &local->self_heal;
- nsources = afr_sh_source_count (sh->sources,
- priv->child_count);
- }
+ if (int_lock->lock_op_ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG, "Non Blocking data inodelks "
+ "failed for %s. by %s",
+ local->loc.path, lkowner_utoa (&frame->root->lk_owner));
- if (nsources == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "Unable to self-heal contents of '%s' (possible split-brain). "
- "Please delete the file from all but the preferred "
- "subvolume.", local->loc.path);
+ if (!sh->data_lock_block) {
+ sh->data_lock_failure_handler(frame, this);
+ } else {
+ int_lock->lock_cbk =
+ afr_sh_data_post_blocking_inodelk_cbk;
+ afr_blocking_lock (frame, this);
+ }
+ } else {
- local->govinda_gOvinda = 1;
+ gf_log (this->name, GF_LOG_DEBUG, "Non Blocking data inodelks "
+ "done for %s by %s. Proceeding to self-heal",
+ local->loc.path, lkowner_utoa (&frame->root->lk_owner));
+ sh->data_lock_success_handler (frame, this);
+ }
- afr_sh_data_finish (frame, this);
- return 0;
- }
+ return 0;
+}
- source = afr_sh_select_source (sh->sources, priv->child_count);
-
- if (source == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "No active sources found.");
+int
+afr_sh_data_lock_rec (call_frame_t *frame, xlator_t *this, char *dom,
+ off_t start, off_t len)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
+ afr_local_t *local = NULL;
- afr_sh_data_finish (frame, this);
- return 0;
- }
+ local = frame->local;
+ int_lock = &local->internal_lock;
- sh->source = source;
- local->cont.lookup.buf.st_size = sh->buf[source].st_size;
+ int_lock->transaction_lk_type = AFR_SELFHEAL_LK;
+ int_lock->selfheal_lk_type = AFR_DATA_SELF_HEAL_LK;
- /* detect changes not visible through pending flags -- JIC */
- for (i = 0; i < priv->child_count; i++) {
- if (i == source || sh->child_errno[i])
- continue;
+ afr_set_lock_number (frame, this);
- if (SIZE_DIFFERS (&sh->buf[i], &sh->buf[source]))
- sh->sources[i] = 0;
- }
+ int_lock->lock_cbk = afr_sh_data_post_nonblocking_inodelk_cbk;
- afr_sh_data_sync_prepare (frame, this);
+ int_lock->domain = dom;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ inodelk->flock.l_start = start;
+ inodelk->flock.l_len = len;
+ inodelk->flock.l_type = F_WRLCK;
- return 0;
-}
+ afr_nonblocking_inodelk (frame, this);
+ return 0;
+}
int
-afr_sh_data_lookup_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *buf, dict_t *xattr)
+afr_post_sh_big_lock_success (call_frame_t *frame, xlator_t *this)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- int call_count = -1;
- int child_index = (long) cookie;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- sh->xattr[child_index] = dict_ref (xattr);
- sh->buf[child_index] = *buf;
- }
- }
- UNLOCK (&frame->lock);
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ GF_ASSERT (sh->old_loop_frame);
+ sh_loop_finish (sh->old_loop_frame, this);
+ sh->old_loop_frame = NULL;
+ sh->data_lock_held = _gf_true;
+ sh->sync_done = _gf_true;
+ afr_sh_data_fxattrop (frame, this);
+ return 0;
+}
- call_count = afr_frame_return (frame);
+int
+afr_post_sh_big_lock_failure (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
- if (call_count == 0) {
- afr_sh_data_fix (frame, this);
- }
+ local = frame->local;
+ sh = &local->self_heal;
- return 0;
+ GF_ASSERT (sh->old_loop_frame);
+ sh_loop_finish (sh->old_loop_frame, this);
+ sh->old_loop_frame = NULL;
+ afr_sh_set_timestamps (frame, this);
+ return 0;
}
int
-afr_sh_data_lookup (call_frame_t *frame, xlator_t *this)
+afr_sh_data_lock (call_frame_t *frame, xlator_t *this,
+ off_t start, off_t len, gf_boolean_t block,
+ char *dom, afr_lock_cbk_t success_handler,
+ afr_lock_cbk_t failure_handler)
{
- afr_self_heal_t *sh = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- dict_t *xattr_req = NULL;
+ afr_local_t * local = NULL;
+ afr_self_heal_t * sh = NULL;
- int call_count = 0;
- int i = 0;
- int ret = 0;
+ local = frame->local;
+ sh = &local->self_heal;
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
+ sh->data_lock_success_handler = success_handler;
+ sh->data_lock_failure_handler = failure_handler;
+ sh->data_lock_block = block;
+ return afr_sh_data_lock_rec (frame, this, dom, start, len);
+}
- call_count = local->child_count;
+int
+afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int child_index = 0;
- local->call_count = call_count;
-
- xattr_req = dict_new();
- if (xattr_req) {
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_uint64 (xattr_req, priv->pending_key[i],
- 3 * sizeof(int32_t));
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ child_index = (long) cookie;
+
+ /* TODO: some of the open's might fail.
+ In that case, modify cleanup fn to send flush on those
+ fd's which are already open */
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "open of %s failed on child %s (%s)",
+ local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ } else {
+ gf_log (this->name, GF_LOG_TRACE,
+ "open of %s succeeded on child %s",
+ local->loc.path,
+ priv->children[child_index]->name);
}
}
+ UNLOCK (&frame->lock);
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_sh_data_lookup_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->lookup,
- &local->loc, xattr_req);
- if (!--call_count)
- break;
- }
- }
-
- if (xattr_req)
- dict_unref (xattr_req);
+ call_count = afr_frame_return (frame);
- return 0;
+ if (call_count == 0) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
+ afr_sh_data_fail (frame, this);
+ return 0;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "fd for %s opened, commencing sync",
+ local->loc.path);
+
+ afr_sh_data_lock (frame, this, 0, 0, _gf_true, priv->sh_domain,
+ afr_sh_dom_lock_success, afr_sh_data_fail);
+ }
+
+ return 0;
}
int
-afr_sh_data_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+afr_sh_data_open (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
-
- /* TODO: what if lock fails? */
-
- local = frame->local;
- sh = &local->self_heal;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- sh->op_failed = 1;
-
- gf_log (this->name,
- GF_LOG_DEBUG,
- "locking of %s on child %d failed: %s",
- local->loc.path, child_index,
- strerror (op_errno));
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "inode of %s on child %d locked",
- local->loc.path, child_index);
- }
- }
- UNLOCK (&frame->lock);
+ int i = 0;
+ int call_count = 0;
+ fd_t *fd = NULL;
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ afr_self_heal_t *sh = NULL;
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (sh->op_failed) {
- afr_sh_data_finish (frame, this);
- return 0;
- }
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
- afr_sh_data_lookup (frame, this);
- }
+ call_count = afr_up_children_count (local->child_up, priv->child_count);
+ local->call_count = call_count;
- return 0;
-}
+ fd = fd_create (local->loc.inode, frame->root->pid);
+ sh->healing_fd = fd;
+ /* open sinks */
+ for (i = 0; i < priv->child_count; i++) {
+ if(!local->child_up[i])
+ continue;
-int
-afr_sh_data_lock (call_frame_t *frame, xlator_t *this)
-{
- struct flock flock;
- int i = 0;
- int call_count = 0;
+ STACK_WIND_COOKIE (frame, afr_sh_data_open_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->open,
+ &local->loc,
+ O_RDWR|O_LARGEFILE, fd, NULL);
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- afr_self_heal_t * sh = NULL;
+ if (!--call_count)
+ break;
+ }
+ return 0;
+}
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+void
+afr_sh_non_reg_fix (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+
+ if (op_ret < 0) {
+ afr_sh_data_fail (frame, this);
+ return;
+ }
- call_count = local->child_count;
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
- local->call_count = call_count;
+ for (i = 0; i < priv->child_count ; i++) {
+ if (1 == local->child_up[i])
+ sh->success[i] = 1;
+ }
- flock.l_start = 0;
- flock.l_len = 0;
- flock.l_type = F_WRLCK;
+ afr_sh_erase_pending (frame, this, AFR_DATA_TRANSACTION,
+ afr_sh_data_erase_pending_cbk,
+ afr_sh_data_finish);
+}
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- gf_log (this->name, GF_LOG_TRACE,
- "locking %s on subvolume %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_data_lock_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- this->name,
- &local->loc, F_SETLK, &flock);
- if (!--call_count)
- break;
- }
- }
+int
+afr_sh_non_reg_lock_success (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
- return 0;
+ local = frame->local;
+ sh = &local->self_heal;
+ sh->data_lock_held = _gf_true;
+ afr_sh_common_lookup (frame, this, &local->loc,
+ afr_sh_non_reg_fix, NULL,
+ AFR_LOOKUP_FAIL_CONFLICTS |
+ AFR_LOOKUP_FAIL_MISSING_GFIDS,
+ NULL);
+ return 0;
}
+gf_boolean_t
+afr_can_start_data_self_heal (afr_self_heal_t *sh, afr_private_t *priv)
+{
+ if (sh->force_confirm_spb)
+ return _gf_true;
+ if (sh->do_data_self_heal &&
+ afr_data_self_heal_enabled (priv->data_self_heal))
+ return _gf_true;
+ return _gf_false;
+}
int
afr_self_heal_data (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = this->private;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = this->private;
+ int ret = -1;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ sh->sh_type_in_action = AFR_SELF_HEAL_DATA;
+
+ if (afr_can_start_data_self_heal (sh, priv)) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
+ ret = afr_inodelk_init (&local->internal_lock.inodelk[1],
+ priv->sh_domain, priv->child_count);
+ if (ret < 0) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_data_done (frame, this);
+ return 0;
+ }
- local = frame->local;
- sh = &local->self_heal;
-
- if (local->need_data_self_heal && priv->data_self_heal) {
- afr_sh_data_lock (frame, this);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "not doing data self heal on %s",
- local->loc.path);
- afr_sh_data_done (frame, this);
- }
+ if (IA_ISREG (sh->type)) {
+ afr_sh_data_open (frame, this);
+ } else {
+ afr_sh_data_lock (frame, this, 0, 0, _gf_true,
+ this->name,
+ afr_sh_non_reg_lock_success,
+ afr_sh_data_fail);
+ }
+ } else {
+ gf_log (this->name, GF_LOG_TRACE,
+ "not doing data self heal on %s",
+ local->loc.path);
+ afr_sh_data_done (frame, this);
+ }
- return 0;
+ return 0;
}
-
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index a5e698cd2..53491a1d7 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <libgen.h>
@@ -30,6 +21,7 @@
#endif
#include "glusterfs.h"
+#include "inode.h"
#include "afr.h"
#include "dict.h"
#include "xlator.h"
@@ -48,2053 +40,2331 @@
#include "afr-self-heal.h"
#include "afr-self-heal-common.h"
+#define AFR_INIT_SH_FRAME_VALS(_frame, _local, _sh, _sh_frame, _sh_local, _sh_sh)\
+ do {\
+ _local = _frame->local;\
+ _sh = &_local->self_heal;\
+ _sh_frame = _sh->sh_frame;\
+ _sh_local = _sh_frame->local;\
+ _sh_sh = &_sh_local->self_heal;\
+ } while (0);
-
+int
+afr_sh_entry_impunge_create_file (call_frame_t *impunge_frame, xlator_t *this,
+ int child_index);
int
afr_sh_entry_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- /*
- TODO: cleanup sh->*
- */
-
- gf_log (this->name, GF_LOG_TRACE,
- "self heal of %s completed",
- local->loc.path);
-
- sh->completion_cbk (frame, this);
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
- return 0;
-}
+ local = frame->local;
+ sh = &local->self_heal;
+ sh->completion_cbk (frame, this);
-int
-afr_sh_entry_unlck_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
-
- /* TODO: what if lock fails? */
-
- local = frame->local;
- sh = &local->self_heal;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "unlocking inode of %s on child %d failed: %s",
- local->loc.path, child_index,
- strerror (op_errno));
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "unlocked inode of %s on child %d",
- local->loc.path, child_index);
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (sh->healing_fd)
- fd_unref (sh->healing_fd);
- sh->healing_fd = NULL;
- afr_sh_entry_done (frame, this);
- }
-
- return 0;
+ return 0;
}
int
afr_sh_entry_unlock (call_frame_t *frame, xlator_t *this)
{
- int i = 0;
- int call_count = 0;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- afr_self_heal_t * sh = NULL;
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ int_lock->lock_cbk = afr_sh_entry_done;
+ afr_unlock (frame, this);
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- call_count = local->child_count;
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- gf_log (this->name, GF_LOG_TRACE,
- "unlocking %s on subvolume %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_entry_unlck_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name,
- &local->loc, NULL,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ return 0;
}
int
afr_sh_entry_finish (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- gf_log (this->name, GF_LOG_TRACE,
- "finishing entry selfheal of %s", local->loc.path);
+ gf_log (this->name, GF_LOG_TRACE,
+ "finishing entry selfheal of %s", local->loc.path);
- afr_sh_entry_unlock (frame, this);
+ afr_sh_entry_unlock (frame, this);
- return 0;
+ return 0;
}
int
afr_sh_entry_erase_pending_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xattr)
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xattr, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
+ long i = 0;
+ int call_count = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *orig_local = NULL;
+ call_frame_t *orig_frame = NULL;
+ afr_private_t *priv = NULL;
+ int32_t read_child = -1;
+
+ local = frame->local;
+ priv = this->private;
+ sh = &local->self_heal;
+ i = (long)cookie;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
- LOCK (&frame->lock);
- {
- }
- UNLOCK (&frame->lock);
+ afr_children_add_child (sh->fresh_children, i, priv->child_count);
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%s: failed to erase pending xattrs on %s (%s)",
+ local->loc.path, priv->children[i]->name,
+ strerror (op_errno));
+ }
- call_count = afr_frame_return (frame);
+ call_count = afr_frame_return (frame);
- if (call_count == 0)
- afr_sh_entry_finish (frame, this);
+ if (call_count == 0) {
+ if (sh->source == -1) {
+ //this happens if the forced merge option is set
+ read_child = sh->fresh_children[0];
+ } else {
+ read_child = sh->source;
+ }
+ afr_inode_set_read_ctx (this, sh->inode, read_child,
+ sh->fresh_children);
+ orig_frame = sh->orig_frame;
+ orig_local = orig_frame->local;
+
+ if (sh->source != -1) {
+ orig_local->cont.lookup.buf.ia_nlink = sh->buf[sh->source].ia_nlink;
+ }
- return 0;
+ afr_sh_entry_finish (frame, this);
+ }
+
+ return 0;
}
int
afr_sh_entry_erase_pending (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int i = 0;
- dict_t **erase_xattr = NULL;
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix, sh->success,
- priv->child_count, AFR_ENTRY_TRANSACTION);
-
- erase_xattr = CALLOC (sizeof (*erase_xattr), priv->child_count);
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i]) {
- call_count++;
-
- erase_xattr[i] = get_new_dict();
- dict_ref (erase_xattr[i]);
- }
- }
-
- afr_sh_delta_to_xattr (priv, sh->delta_matrix, erase_xattr,
- priv->child_count, AFR_ENTRY_TRANSACTION);
-
- local->call_count = call_count;
- for (i = 0; i < priv->child_count; i++) {
- if (!erase_xattr[i])
- continue;
-
- gf_log (this->name, GF_LOG_TRACE,
- "erasing pending flags from %s on %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_entry_erase_pending_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->loc,
- GF_XATTROP_ADD_ARRAY, erase_xattr[i]);
- if (!--call_count)
- break;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (erase_xattr[i]) {
- dict_unref (erase_xattr[i]);
- }
- }
- FREE (erase_xattr);
-
- return 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ if (sh->entries_skipped) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ goto out;
+ }
+ afr_sh_erase_pending (frame, this, AFR_ENTRY_TRANSACTION,
+ afr_sh_entry_erase_pending_cbk,
+ afr_sh_entry_finish);
+ return 0;
+out:
+ afr_sh_entry_finish (frame, this);
+ return 0;
}
static int
next_active_source (call_frame_t *frame, xlator_t *this,
- int current_active_source)
+ int current_active_source)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int source = -1;
- int next_active_source = -1;
- int i = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- source = sh->source;
-
- if (source != -1) {
- if (current_active_source != source)
- next_active_source = source;
- goto out;
- }
-
- /*
- the next active sink becomes the source for the
- 'conservative decision' of merging all entries
- */
-
- for (i = 0; i < priv->child_count; i++) {
- if ((sh->sources[i] == 0)
- && (local->child_up[i] == 1)
- && (i > current_active_source)) {
-
- next_active_source = i;
- break;
- }
- }
-out:
- return next_active_source;
-}
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int source = -1;
+ int next_active_source = -1;
+ int i = 0;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+ source = sh->source;
+ if (source != -1) {
+ if (current_active_source != source)
+ next_active_source = source;
+ goto out;
+ }
-static int
-next_active_sink (call_frame_t *frame, xlator_t *this,
- int current_active_sink)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int next_active_sink = -1;
- int i = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- /*
- the next active sink becomes the source for the
- 'conservative decision' of merging all entries
- */
-
- for (i = 0; i < priv->child_count; i++) {
- if ((sh->sources[i] == 0)
- && (local->child_up[i] == 1)
- && (i > current_active_sink)) {
-
- next_active_sink = i;
- break;
- }
- }
-
- return next_active_sink;
-}
+ /*
+ the next active sink becomes the source for the
+ 'conservative decision' of merging all entries
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if ((sh->sources[i] == 0)
+ && (local->child_up[i] == 1)
+ && (i > current_active_source)) {
-int
-build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name)
-{
- int ret = -1;
+ next_active_source = i;
+ break;
+ }
+ }
+out:
+ return next_active_source;
+}
- if (!child) {
- goto out;
- }
- if (strcmp (parent->path, "/") == 0)
- asprintf ((char **)&child->path, "/%s", name);
- else
- asprintf ((char **)&child->path, "%s/%s", parent->path, name);
- if (!child->path) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+static int
+next_active_sink (call_frame_t *frame, xlator_t *this,
+ int current_active_sink)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int next_active_sink = -1;
+ int i = 0;
- child->name = strrchr (child->path, '/');
- if (child->name)
- child->name++;
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
- child->parent = inode_ref (parent->inode);
- child->inode = inode_new (parent->inode->table);
+ /*
+ the next active sink becomes the source for the
+ 'conservative decision' of merging all entries
+ */
- if (!child->inode) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if ((sh->sources[i] == 0)
+ && (local->child_up[i] == 1)
+ && (i > current_active_sink)) {
- ret = 0;
-out:
- if (ret == -1)
- loc_wipe (child);
+ next_active_sink = i;
+ break;
+ }
+ }
- return ret;
+ return next_active_sink;
}
+int
+afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this);
+
+int
+afr_sh_entry_impunge_subvol (call_frame_t *frame, xlator_t *this);
int
afr_sh_entry_expunge_all (call_frame_t *frame, xlator_t *this);
int
afr_sh_entry_expunge_subvol (call_frame_t *frame, xlator_t *this,
- int active_src);
+ int active_src);
int
afr_sh_entry_expunge_entry_done (call_frame_t *frame, xlator_t *this,
- int active_src)
+ int active_src, int32_t op_ret,
+ int32_t op_errno)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int call_count = 0;
+ int call_count = 0;
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
+ call_count = afr_frame_return (frame);
- LOCK (&frame->lock);
- {
- }
- UNLOCK (&frame->lock);
+ if (call_count == 0)
+ afr_sh_entry_expunge_subvol (frame, this, active_src);
- call_count = afr_frame_return (frame);
+ return 0;
+}
+
+int
+afr_sh_entry_expunge_parent_setattr_cbk (call_frame_t *expunge_frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop,
+ dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *expunge_local = NULL;
+ afr_self_heal_t *expunge_sh = NULL;
+ call_frame_t *frame = NULL;
+ int active_src = (long) cookie;
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+ expunge_local = expunge_frame->local;
+ expunge_sh = &expunge_local->self_heal;
+ frame = expunge_sh->sh_frame;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ if (op_ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "setattr on parent directory of %s on subvolume %s failed: %s",
+ expunge_local->loc.path,
+ priv->children[active_src]->name, strerror (op_errno));
+ }
- if (call_count == 0)
- afr_sh_entry_expunge_subvol (frame, this, active_src);
+ AFR_STACK_DESTROY (expunge_frame);
+ sh->expunge_done (frame, this, active_src, op_ret, op_errno);
- return 0;
+ return 0;
}
int
afr_sh_entry_expunge_remove_cbk (call_frame_t *expunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- int active_src = 0;
- call_frame_t *frame = NULL;
-
-
- priv = this->private;
- expunge_local = expunge_frame->local;
- expunge_sh = &expunge_local->self_heal;
- frame = expunge_sh->sh_frame;
-
- active_src = (long) cookie;
-
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "removed %s on %s",
- expunge_local->loc.path,
- priv->children[active_src]->name);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "removing %s on %s failed (%s)",
- expunge_local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
- }
-
- AFR_STACK_DESTROY (expunge_frame);
- afr_sh_entry_expunge_entry_done (frame, this, active_src);
-
- return 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *expunge_local = NULL;
+ afr_self_heal_t *expunge_sh = NULL;
+ int active_src = 0;
+ int32_t valid = 0;
+
+ priv = this->private;
+ expunge_local = expunge_frame->local;
+ expunge_sh = &expunge_local->self_heal;
+
+ active_src = (long) cookie;
+
+ if (op_ret == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "removed %s on %s",
+ expunge_local->loc.path,
+ priv->children[active_src]->name);
+ } else {
+ gf_log (this->name, GF_LOG_INFO,
+ "removing %s on %s failed (%s)",
+ expunge_local->loc.path,
+ priv->children[active_src]->name,
+ strerror (op_errno));
+ }
+
+ valid = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
+
+ STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_parent_setattr_cbk,
+ (void *) (long) active_src,
+ priv->children[active_src],
+ priv->children[active_src]->fops->setattr,
+ &expunge_sh->parent_loc,
+ &expunge_sh->parentbuf,
+ valid, NULL);
+
+ return 0;
}
int
-afr_sh_entry_expunge_rmdir (call_frame_t *expunge_frame, xlator_t *this,
- int active_src)
+afr_sh_entry_expunge_unlink (call_frame_t *expunge_frame, xlator_t *this,
+ int active_src)
{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *expunge_local = NULL;
- priv = this->private;
- expunge_local = expunge_frame->local;
+ priv = this->private;
+ expunge_local = expunge_frame->local;
- gf_log (this->name, GF_LOG_TRACE,
- "removing directory %s on %s",
- expunge_local->loc.path, priv->children[active_src]->name);
+ gf_log (this->name, GF_LOG_TRACE,
+ "expunging file %s on %s",
+ expunge_local->loc.path, priv->children[active_src]->name);
- STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_remove_cbk,
- (void *) (long) active_src,
- priv->children[active_src],
- priv->children[active_src]->fops->rmdir,
- &expunge_local->loc);
+ STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_remove_cbk,
+ (void *) (long) active_src,
+ priv->children[active_src],
+ priv->children[active_src]->fops->unlink,
+ &expunge_local->loc, 0, NULL);
- return 0;
+ return 0;
}
+
int
-afr_sh_entry_expunge_unlink (call_frame_t *expunge_frame, xlator_t *this,
- int active_src)
+afr_sh_entry_expunge_rmdir (call_frame_t *expunge_frame, xlator_t *this,
+ int active_src)
{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
-
- gf_log (this->name, GF_LOG_TRACE,
- "unlinking file %s on %s",
- expunge_local->loc.path, priv->children[active_src]->name);
-
- STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_remove_cbk,
- (void *) (long) active_src,
- priv->children[active_src],
- priv->children[active_src]->fops->unlink,
- &expunge_local->loc);
-
- return 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *expunge_local = NULL;
+
+ priv = this->private;
+ expunge_local = expunge_frame->local;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "expunging directory %s on %s",
+ expunge_local->loc.path, priv->children[active_src]->name);
+
+ STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_remove_cbk,
+ (void *) (long) active_src,
+ priv->children[active_src],
+ priv->children[active_src]->fops->rmdir,
+ &expunge_local->loc, 1, NULL);
+
+ return 0;
}
int
afr_sh_entry_expunge_remove (call_frame_t *expunge_frame, xlator_t *this,
- int active_src, struct stat *buf)
+ int active_src, struct iatt *buf,
+ struct iatt *parentbuf)
{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- int source = 0;
- call_frame_t *frame = NULL;
- int type = 0;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
- expunge_sh = &expunge_local->self_heal;
- frame = expunge_sh->sh_frame;
- source = expunge_sh->source;
-
- type = (buf->st_mode & S_IFMT);
-
- switch (type) {
- case S_IFSOCK:
- case S_IFREG:
- case S_IFBLK:
- case S_IFCHR:
- case S_IFIFO:
- case S_IFLNK:
- afr_sh_entry_expunge_unlink (expunge_frame, this, active_src);
-
- break;
- case S_IFDIR:
- afr_sh_entry_expunge_rmdir (expunge_frame, this, active_src);
- break;
- default:
- gf_log (this->name, GF_LOG_ERROR,
- "%s has unknown file type on %s: 0%o",
- expunge_local->loc.path,
- priv->children[source]->name, type);
- goto out;
- break;
- }
-
- return 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *expunge_local = NULL;
+ afr_self_heal_t *expunge_sh = NULL;
+ call_frame_t *frame = NULL;
+ int type = 0;
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *local = NULL;
+ loc_t *loc = NULL;
+
+ priv = this->private;
+ expunge_local = expunge_frame->local;
+ expunge_sh = &expunge_local->self_heal;
+ frame = expunge_sh->sh_frame;
+ local = frame->local;
+ sh = &local->self_heal;
+ loc = &expunge_local->loc;
+
+ type = buf->ia_type;
+ if (loc->parent && uuid_is_null (loc->parent->gfid))
+ uuid_copy (loc->pargfid, parentbuf->ia_gfid);
+
+ switch (type) {
+ case IA_IFSOCK:
+ case IA_IFREG:
+ case IA_IFBLK:
+ case IA_IFCHR:
+ case IA_IFIFO:
+ case IA_IFLNK:
+ afr_sh_entry_expunge_unlink (expunge_frame, this, active_src);
+ break;
+ case IA_IFDIR:
+ afr_sh_entry_expunge_rmdir (expunge_frame, this, active_src);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s has unknown file type on %s: 0%o",
+ expunge_local->loc.path,
+ priv->children[active_src]->name, type);
+ goto out;
+ break;
+ }
+
+ return 0;
out:
- AFR_STACK_DESTROY (expunge_frame);
- afr_sh_entry_expunge_entry_done (frame, this, active_src);
+ AFR_STACK_DESTROY (expunge_frame);
+ sh->expunge_done (frame, this, active_src, -1, EINVAL);
- return 0;
+ return 0;
}
int
afr_sh_entry_expunge_lookup_cbk (call_frame_t *expunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *buf, dict_t *x)
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, dict_t *x,
+ struct iatt *postparent)
{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- call_frame_t *frame = NULL;
- int active_src = 0;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
- expunge_sh = &expunge_local->self_heal;
- frame = expunge_sh->sh_frame;
- active_src = (long) cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "lookup of %s on %s failed (%s)",
- expunge_local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
- goto out;
- }
-
- afr_sh_entry_expunge_remove (expunge_frame, this, active_src, buf);
-
- return 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *expunge_local = NULL;
+ afr_self_heal_t *expunge_sh = NULL;
+ call_frame_t *frame = NULL;
+ int active_src = 0;
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+ expunge_local = expunge_frame->local;
+ expunge_sh = &expunge_local->self_heal;
+ frame = expunge_sh->sh_frame;
+ active_src = (long) cookie;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "lookup of %s on %s failed (%s)",
+ expunge_local->loc.path,
+ priv->children[active_src]->name,
+ strerror (op_errno));
+ goto out;
+ }
+
+ afr_sh_entry_expunge_remove (expunge_frame, this, active_src, buf,
+ postparent);
+
+ return 0;
out:
- AFR_STACK_DESTROY (expunge_frame);
- afr_sh_entry_expunge_entry_done (frame, this, active_src);
+ AFR_STACK_DESTROY (expunge_frame);
+ sh->expunge_done (frame, this, active_src, op_ret, op_errno);
- return 0;
+ return 0;
}
int
afr_sh_entry_expunge_purge (call_frame_t *expunge_frame, xlator_t *this,
- int active_src)
+ int active_src)
{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
-
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s on %s",
- expunge_local->loc.path, priv->children[active_src]->name);
-
- STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_lookup_cbk,
- (void *) (long) active_src,
- priv->children[active_src],
- priv->children[active_src]->fops->lookup,
- &expunge_local->loc, 0);
-
- return 0;
-}
+ afr_private_t *priv = NULL;
+ afr_local_t *expunge_local = NULL;
+ priv = this->private;
+ expunge_local = expunge_frame->local;
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "looking up %s on %s",
+ expunge_local->loc.path, priv->children[active_src]->name);
+
+ STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_lookup_cbk,
+ (void *) (long) active_src,
+ priv->children[active_src],
+ priv->children[active_src]->fops->lookup,
+ &expunge_local->loc, NULL);
+
+ return 0;
+}
int
afr_sh_entry_expunge_entry_cbk (call_frame_t *expunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *buf, dict_t *x)
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, dict_t *x,
+ struct iatt *postparent)
{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- int source = 0;
- call_frame_t *frame = NULL;
- int active_src = 0;
-
-
- priv = this->private;
- expunge_local = expunge_frame->local;
- expunge_sh = &expunge_local->self_heal;
- frame = expunge_sh->sh_frame;
- active_src = expunge_sh->active_source;
- source = (long) cookie;
-
- if (op_ret == -1 && op_errno == ENOENT) {
-
- gf_log (this->name, GF_LOG_TRACE,
- "missing entry %s on %s",
- expunge_local->loc.path,
- priv->children[source]->name);
-
- afr_sh_entry_expunge_purge (expunge_frame, this, active_src);
-
- return 0;
- }
-
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "%s exists under %s",
- expunge_local->loc.path,
- priv->children[source]->name);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s under %s failed (%s)",
- expunge_local->loc.path,
- priv->children[source]->name,
- strerror (op_errno));
- }
-
- AFR_STACK_DESTROY (expunge_frame);
- afr_sh_entry_expunge_entry_done (frame, this, active_src);
-
- return 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *expunge_local = NULL;
+ afr_self_heal_t *expunge_sh = NULL;
+ int source = 0;
+ call_frame_t *frame = NULL;
+ int active_src = 0;
+ int need_expunge = 0;
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+ expunge_local = expunge_frame->local;
+ expunge_sh = &expunge_local->self_heal;
+ frame = expunge_sh->sh_frame;
+ active_src = expunge_sh->active_source;
+ source = (long) cookie;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ if (op_ret == -1 && op_errno == ENOENT)
+ need_expunge = 1;
+ else if (op_ret == -1)
+ goto out;
+
+ if (!uuid_is_null (expunge_sh->entrybuf.ia_gfid) &&
+ !uuid_is_null (buf->ia_gfid) &&
+ (uuid_compare (expunge_sh->entrybuf.ia_gfid, buf->ia_gfid) != 0)) {
+ char uuidbuf1[64];
+ char uuidbuf2[64];
+ gf_log (this->name, GF_LOG_DEBUG,
+ "entry %s found on %s with mismatching gfid (%s/%s)",
+ expunge_local->loc.path,
+ priv->children[source]->name,
+ uuid_utoa_r (expunge_sh->entrybuf.ia_gfid, uuidbuf1),
+ uuid_utoa_r (buf->ia_gfid, uuidbuf2));
+ need_expunge = 1;
+ }
+
+ if (need_expunge) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Entry %s is missing on %s and deleting from "
+ "replica's other bricks",
+ expunge_local->loc.path,
+ priv->children[source]->name);
+
+ if (postparent)
+ expunge_sh->parentbuf = *postparent;
+
+ afr_sh_entry_expunge_purge (expunge_frame, this, active_src);
+
+ return 0;
+ }
+
+out:
+ if (op_ret == 0) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s exists under %s",
+ expunge_local->loc.path,
+ priv->children[source]->name);
+ } else {
+ gf_log (this->name, GF_LOG_INFO,
+ "looking up %s under %s failed (%s)",
+ expunge_local->loc.path,
+ priv->children[source]->name,
+ strerror (op_errno));
+ }
+
+ AFR_STACK_DESTROY (expunge_frame);
+ sh->expunge_done (frame, this, active_src, op_ret, op_errno);
+
+ return 0;
}
+static gf_boolean_t
+can_skip_entry_self_heal (char *name, loc_t *parent_loc)
+{
+ if (strcmp (name, ".") == 0) {
+ return _gf_true;
+ } else if (strcmp (name, "..") == 0) {
+ return _gf_true;
+ } else if (loc_is_root (parent_loc) &&
+ (strcmp (name, GF_REPLICATE_TRASH_DIR) == 0)) {
+ return _gf_true;
+ }
+ return _gf_false;
+}
int
afr_sh_entry_expunge_entry (call_frame_t *frame, xlator_t *this,
- char *name)
+ gf_dirent_t *entry)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int ret = -1;
- call_frame_t *expunge_frame = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- int active_src = 0;
- int source = 0;
- int op_errno = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- active_src = sh->active_source;
- source = sh->source;
-
- if ((strcmp (name, ".") == 0)
- || (strcmp (name, "..") == 0)) {
- gf_log (this->name, GF_LOG_TRACE,
- "skipping inspection of %s under %s",
- name, local->loc.path);
- goto out;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "inspecting existance of %s under %s",
- name, local->loc.path);
-
- expunge_frame = copy_frame (frame);
- if (!expunge_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
-
- ALLOC_OR_GOTO (expunge_local, afr_local_t, out);
-
- expunge_frame->local = expunge_local;
- expunge_sh = &expunge_local->self_heal;
- expunge_sh->sh_frame = frame;
- expunge_sh->active_source = active_src;
-
- ret = build_child_loc (this, &expunge_local->loc, &local->loc, name);
- if (ret != 0) {
- goto out;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s on %s", expunge_local->loc.path,
- priv->children[source]->name);
-
- STACK_WIND_COOKIE (expunge_frame,
- afr_sh_entry_expunge_entry_cbk,
- (void *) (long) source,
- priv->children[source],
- priv->children[source]->fops->lookup,
- &expunge_local->loc, 0);
-
- ret = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int ret = -1;
+ call_frame_t *expunge_frame = NULL;
+ afr_local_t *expunge_local = NULL;
+ afr_self_heal_t *expunge_sh = NULL;
+ int active_src = 0;
+ int source = 0;
+ int op_errno = 0;
+ char *name = NULL;
+ int op_ret = -1;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ active_src = sh->active_source;
+ source = sh->source;
+ sh->expunge_done = afr_sh_entry_expunge_entry_done;
+
+ name = entry->d_name;
+ if (can_skip_entry_self_heal (name, &local->loc)) {
+ op_ret = 0;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "inspecting existence of %s under %s",
+ name, local->loc.path);
+
+ expunge_frame = copy_frame (frame);
+ if (!expunge_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ AFR_LOCAL_ALLOC_OR_GOTO (expunge_local, out);
+
+ expunge_frame->local = expunge_local;
+ expunge_sh = &expunge_local->self_heal;
+ expunge_sh->sh_frame = frame;
+ expunge_sh->active_source = active_src;
+ expunge_sh->entrybuf = entry->d_stat;
+ loc_copy (&expunge_sh->parent_loc, &local->loc);
+
+ ret = afr_build_child_loc (this, &expunge_local->loc, &local->loc,
+ name);
+ if (ret != 0) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "looking up %s on %s", expunge_local->loc.path,
+ priv->children[source]->name);
+
+ STACK_WIND_COOKIE (expunge_frame,
+ afr_sh_entry_expunge_entry_cbk,
+ (void *) (long) source,
+ priv->children[source],
+ priv->children[source]->fops->lookup,
+ &expunge_local->loc, NULL);
+
+ ret = 0;
out:
- if (ret == -1)
- afr_sh_entry_expunge_entry_done (frame, this, active_src);
+ if (ret == -1)
+ sh->expunge_done (frame, this, active_src, op_ret, op_errno);
- return 0;
+ return 0;
}
int
afr_sh_entry_expunge_readdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ gf_dirent_t *entries, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- gf_dirent_t *entry = NULL;
- off_t last_offset = 0;
- int active_src = 0;
- int entry_count = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- active_src = sh->active_source;
-
- if (op_ret <= 0) {
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir of %s on subvolume %s failed (%s)",
- local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "readdir of %s on subvolume %s complete",
- local->loc.path,
- priv->children[active_src]->name);
- }
-
- afr_sh_entry_expunge_all (frame, this);
- return 0;
- }
-
- list_for_each_entry (entry, &entries->list, list) {
- last_offset = entry->d_off;
- entry_count++;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "readdir'ed %d entries from %s",
- entry_count, priv->children[active_src]->name);
-
- sh->offset = last_offset;
- local->call_count = entry_count;
-
- list_for_each_entry (entry, &entries->list, list) {
- afr_sh_entry_expunge_entry (frame, this, entry->d_name);
- }
-
- return 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ gf_dirent_t *entry = NULL;
+ off_t last_offset = 0;
+ int active_src = 0;
+ int entry_count = 0;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ active_src = sh->active_source;
+
+ if (op_ret <= 0) {
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "readdir of %s on subvolume %s failed (%s)",
+ local->loc.path,
+ priv->children[active_src]->name,
+ strerror (op_errno));
+ } else {
+ gf_log (this->name, GF_LOG_TRACE,
+ "readdir of %s on subvolume %s complete",
+ local->loc.path,
+ priv->children[active_src]->name);
+ }
+
+ afr_sh_entry_expunge_all (frame, this);
+ return 0;
+ }
+
+ list_for_each_entry (entry, &entries->list, list) {
+ last_offset = entry->d_off;
+ entry_count++;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "readdir'ed %d entries from %s",
+ entry_count, priv->children[active_src]->name);
+
+ sh->offset = last_offset;
+ local->call_count = entry_count;
+
+ list_for_each_entry (entry, &entries->list, list) {
+ afr_sh_entry_expunge_entry (frame, this, entry);
+ }
+
+ return 0;
}
int
afr_sh_entry_expunge_subvol (call_frame_t *frame, xlator_t *this,
- int active_src)
+ int active_src)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
- STACK_WIND (frame, afr_sh_entry_expunge_readdir_cbk,
- priv->children[active_src],
- priv->children[active_src]->fops->readdir,
- sh->healing_fd, sh->block_size, sh->offset);
+ STACK_WIND (frame, afr_sh_entry_expunge_readdir_cbk,
+ priv->children[active_src],
+ priv->children[active_src]->fops->readdirp,
+ sh->healing_fd, sh->block_size, sh->offset, NULL);
- return 0;
+ return 0;
}
int
afr_sh_entry_expunge_all (call_frame_t *frame, xlator_t *this)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int active_src = -1;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int active_src = -1;
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
- sh->offset = 0;
+ sh->offset = 0;
- if (sh->source == -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "no active sources for %s to expunge entries",
- local->loc.path);
- goto out;
- }
+ if (sh->source == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no active sources for %s to expunge entries",
+ local->loc.path);
+ goto out;
+ }
- active_src = next_active_sink (frame, this, sh->active_source);
- sh->active_source = active_src;
+ active_src = next_active_sink (frame, this, sh->active_source);
+ sh->active_source = active_src;
- if (sh->op_failed) {
- goto out;
- }
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
+ goto out;
+ }
- if (active_src == -1) {
- /* completed creating missing files on all subvolumes */
- goto out;
- }
+ if (active_src == -1) {
+ /* completed creating missing files on all subvolumes */
+ goto out;
+ }
- gf_log (this->name, GF_LOG_TRACE,
- "expunging entries of %s on %s to other sinks",
- local->loc.path, priv->children[active_src]->name);
+ gf_log (this->name, GF_LOG_TRACE,
+ "expunging entries of %s on %s to other sinks",
+ local->loc.path, priv->children[active_src]->name);
- afr_sh_entry_expunge_subvol (frame, this, active_src);
+ afr_sh_entry_expunge_subvol (frame, this, active_src);
- return 0;
+ return 0;
out:
- afr_sh_entry_erase_pending (frame, this);
- return 0;
+ afr_sh_entry_impunge_all (frame, this);
+ return 0;
}
int
-afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this);
-
-int
-afr_sh_entry_impunge_subvol (call_frame_t *frame, xlator_t *this,
- int active_src);
-
-int
afr_sh_entry_impunge_entry_done (call_frame_t *frame, xlator_t *this,
- int active_src)
+ int32_t op_ret, int32_t op_errno)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int call_count = 0;
+ int call_count = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
+ local = frame->local;
+ sh = &local->self_heal;
+ if (op_ret < 0)
+ sh->entries_skipped = _gf_true;
+ call_count = afr_frame_return (frame);
+ if (call_count == 0)
+ afr_sh_entry_impunge_subvol (frame, this);
- LOCK (&frame->lock);
- {
- }
- UNLOCK (&frame->lock);
+ return 0;
+}
- call_count = afr_frame_return (frame);
+void
+afr_sh_entry_call_impunge_done (call_frame_t *impunge_frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_local_t *impunge_local = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ call_frame_t *frame = NULL;
- if (call_count == 0)
- afr_sh_entry_impunge_subvol (frame, this, active_src);
+ AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
+ frame, local, sh);
- return 0;
+ AFR_STACK_DESTROY (impunge_frame);
+ sh->impunge_done (frame, this, op_ret, op_errno);
}
-
int
-afr_sh_entry_impunge_utimens_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct stat *stbuf)
+afr_sh_entry_impunge_setattr_cbk (call_frame_t *impunge_frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop,
+ dict_t *xdata)
{
- int call_count = 0;
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- call_frame_t *frame = NULL;
- int active_src = 0;
- int child_index = 0;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- frame = impunge_sh->sh_frame;
- local = frame->local;
- sh = &local->self_heal;
- active_src = sh->active_source;
- child_index = (long) cookie;
-
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "utimes set for %s on %s",
- impunge_local->loc.path,
- priv->children[child_index]->name);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "setting utimes of %s on %s failed (%s)",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- }
-
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
-
- if (call_count == 0) {
- AFR_STACK_DESTROY (impunge_frame);
- afr_sh_entry_impunge_entry_done (frame, this, active_src);
- }
-
- return 0;
-}
+ int call_count = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ int child_index = 0;
+
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+ child_index = (long) cookie;
+
+ if (op_ret == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "setattr done for %s on %s",
+ impunge_local->loc.path,
+ priv->children[child_index]->name);
+ } else {
+ gf_log (this->name, GF_LOG_INFO,
+ "setattr (%s) on %s failed (%s)",
+ impunge_local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ }
+ call_count = afr_frame_return (impunge_frame);
+ if (call_count == 0) {
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ 0, op_errno);
+ }
+
+ return 0;
+}
int
-afr_sh_entry_impunge_chown_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct stat *stbuf)
+afr_sh_entry_impunge_parent_setattr_cbk (call_frame_t *setattr_frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop,
+ dict_t *xdata)
{
- int call_count = 0;
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- call_frame_t *frame = NULL;
- int active_src = 0;
- int child_index = 0;
- struct timespec ts[2];
-
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- frame = impunge_sh->sh_frame;
- child_index = (long) cookie;
-
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "ownership of %s on %s changed",
- impunge_local->loc.path,
- priv->children[child_index]->name);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "setting ownership of %s on %s failed (%s)",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- goto out;
- }
-
-#ifdef HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC
- ts[0] = impunge_local->cont.lookup.buf.st_atim;
- ts[1] = impunge_local->cont.lookup.buf.st_mtim;
-#elif HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC
- ts[0] = impunge_local->cont.lookup.buf.st_atimespec;
- ts[1] = impunge_local->cont.lookup.buf.st_mtimespec;
-#else
- ts[0].tv_sec = impunge_local->cont.lookup.buf.st_atime;
- ts[1].tv_sec = impunge_local->cont.lookup.buf.st_mtime;
-#endif
- STACK_WIND_COOKIE (impunge_frame,
- afr_sh_entry_impunge_utimens_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->utimens,
- &impunge_local->loc, ts);
+ int call_count = 0;
+ afr_local_t *setattr_local = NULL;
+
+ setattr_local = setattr_frame->local;
+ if (op_ret != 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "setattr on parent directory (%s) failed: %s",
+ setattr_local->loc.path, strerror (op_errno));
+ }
- return 0;
+ call_count = afr_frame_return (setattr_frame);
+ if (call_count == 0)
+ AFR_STACK_DESTROY (setattr_frame);
+ return 0;
+}
+int
+afr_sh_entry_impunge_setattr (call_frame_t *impunge_frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_local_t *setattr_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ call_frame_t *setattr_frame = NULL;
+ int32_t valid = 0;
+ int32_t op_errno = 0;
+ int child_index = 0;
+ int call_count = 0;
+ int i = 0;
+
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "setting ownership of %s on %s to %d/%d",
+ impunge_local->loc.path,
+ priv->children[child_index]->name,
+ impunge_sh->entrybuf.ia_uid,
+ impunge_sh->entrybuf.ia_gid);
+
+ setattr_frame = copy_frame (impunge_frame);
+ if (!setattr_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ AFR_LOCAL_ALLOC_OR_GOTO (setattr_frame->local, out);
+ setattr_local = setattr_frame->local;
+ call_count = afr_errno_count (NULL, impunge_sh->child_errno,
+ priv->child_count, 0);
+ loc_copy (&setattr_local->loc, &impunge_sh->parent_loc);
+ impunge_local->call_count = call_count;
+ setattr_local->call_count = call_count;
+ for (i = 0; i < priv->child_count; i++) {
+ if (impunge_sh->child_errno[i])
+ continue;
+ valid = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
+ STACK_WIND_COOKIE (setattr_frame,
+ afr_sh_entry_impunge_parent_setattr_cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->setattr,
+ &setattr_local->loc,
+ &impunge_sh->parentbuf, valid, NULL);
+
+ valid = GF_SET_ATTR_UID | GF_SET_ATTR_GID |
+ GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
+ STACK_WIND_COOKIE (impunge_frame,
+ afr_sh_entry_impunge_setattr_cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->setattr,
+ &impunge_local->loc,
+ &impunge_sh->entrybuf, valid, NULL);
+ call_count--;
+ }
+ GF_ASSERT (!call_count);
+ return 0;
out:
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
-
- if (call_count == 0) {
- AFR_STACK_DESTROY (impunge_frame);
- afr_sh_entry_impunge_entry_done (frame, this, active_src);
- }
-
- return 0;
+ if (setattr_frame)
+ AFR_STACK_DESTROY (setattr_frame);
+ afr_sh_entry_call_impunge_done (impunge_frame, this, 0, op_errno);
+ return 0;
}
-
int
afr_sh_entry_impunge_xattrop_cbk (call_frame_t *impunge_frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
- dict_t *xattr)
+ dict_t *xattr, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- call_frame_t *frame = NULL;
- int child_index = 0;
-
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- frame = impunge_sh->sh_frame;
-
- child_index = (long) cookie;
-
- gf_log (this->name, GF_LOG_TRACE,
- "setting ownership of %s on %s to %d/%d",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- impunge_local->cont.lookup.buf.st_uid,
- impunge_local->cont.lookup.buf.st_gid);
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_chown_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->chown,
- &impunge_local->loc,
- impunge_local->cont.lookup.buf.st_uid,
- impunge_local->cont.lookup.buf.st_gid);
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ int child_index = 0;
+
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+
+ child_index = (long) cookie;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%s: failed to perform xattrop on %s (%s)",
+ impunge_local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ goto out;
+ }
+
+ afr_sh_entry_impunge_setattr (impunge_frame, this);
+ return 0;
+out:
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ -1, op_errno);
return 0;
}
-
int
-afr_sh_entry_impunge_newfile_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *stbuf)
+afr_sh_entry_impunge_perform_xattrop (call_frame_t *impunge_frame,
+ xlator_t *this)
{
- int call_count = 0;
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- call_frame_t *frame = NULL;
- int active_src = 0;
- int child_index = 0;
- int pending_array[3] = {0, };
- dict_t *xattr = NULL;
- int ret = 0;
- int idx = 0;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
+ int active_src = 0;
+ dict_t *xattr = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ int32_t op_errno = 0;
+
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+ active_src = impunge_sh->active_source;
+
+ afr_prepare_new_entry_pending_matrix (impunge_local->pending,
+ afr_is_errno_unset,
+ impunge_sh->child_errno,
+ &impunge_sh->entrybuf,
+ priv->child_count);
+ xattr = dict_new ();
+ if (!xattr) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- frame = impunge_sh->sh_frame;
- local = frame->local;
- sh = &local->self_heal;
- active_src = sh->active_source;
+ afr_set_pending_dict (priv, xattr, impunge_local->pending, active_src,
+ LOCAL_LAST);
- child_index = (long) cookie;
+ STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_xattrop_cbk,
+ (void *) (long) active_src,
+ priv->children[active_src],
+ priv->children[active_src]->fops->xattrop,
+ &impunge_local->loc, GF_XATTROP_ADD_ARRAY, xattr, NULL);
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "creation of %s on %s failed (%s)",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- goto out;
- }
+ if (xattr)
+ dict_unref (xattr);
+ return 0;
+out:
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ -1, op_errno);
+ return 0;
+}
- inode->st_mode = stbuf->st_mode;
+int
+afr_sh_entry_impunge_newfile_cbk (call_frame_t *impunge_frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int call_count = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ int child_index = 0;
+
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+
+ child_index = (long) cookie;
+
+ if (op_ret == -1) {
+ impunge_sh->child_errno[child_index] = op_errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "creation of %s on %s failed (%s)",
+ impunge_local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ } else {
+ impunge_sh->child_errno[child_index] = 0;
+ }
- xattr = get_new_dict ();
- dict_ref (xattr);
+ call_count = afr_frame_return (impunge_frame);
+ if (call_count == 0) {
+ if (!afr_errno_count (NULL, impunge_sh->child_errno,
+ priv->child_count, 0)) {
+ // new_file creation failed every where
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ -1, op_errno);
+ goto out;
+ }
+ afr_sh_entry_impunge_perform_xattrop (impunge_frame, this);
+ }
+out:
+ return 0;
+}
- idx = afr_index_for_transaction_type (AFR_METADATA_TRANSACTION);
- pending_array[idx] = hton32 (1);
- if (S_ISDIR (stbuf->st_mode))
- idx = afr_index_for_transaction_type (AFR_ENTRY_TRANSACTION);
- else
- idx = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
- pending_array[idx] = hton32 (1);
+int
+afr_sh_entry_impunge_hardlink_cbk (call_frame_t *impunge_frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int call_count = 0;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+
+ if (IA_IFLNK == impunge_sh->entrybuf.ia_type) {
+ //For symlinks impunge is attempted un-conditionally
+ //So the file can already exist.
+ if ((op_ret < 0) && (op_errno == EEXIST))
+ op_ret = 0;
+ }
- ret = dict_set_static_bin (xattr, priv->pending_key[child_index],
- pending_array, sizeof (pending_array));
+ call_count = afr_frame_return (impunge_frame);
+ if (call_count == 0)
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ op_ret, op_errno);
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_xattrop_cbk,
- (void *) (long) child_index,
- priv->children[active_src],
- priv->children[active_src]->fops->xattrop,
- &impunge_local->loc, GF_XATTROP_ADD_ARRAY, xattr);
+ return 0;
+}
- dict_unref (xattr);
+int
+afr_sh_entry_impunge_hardlink (call_frame_t *impunge_frame, xlator_t *this,
+ int child_index)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ loc_t *loc = NULL;
+ struct iatt *buf = NULL;
+ loc_t oldloc = {0};
+
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+ loc = &impunge_local->loc;
+ buf = &impunge_sh->entrybuf;
+
+ oldloc.inode = inode_ref (loc->inode);
+ uuid_copy (oldloc.gfid, buf->ia_gfid);
+ gf_log (this->name, GF_LOG_DEBUG, "linking missing file %s on %s",
+ loc->path, priv->children[child_index]->name);
+
+ STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_hardlink_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->link,
+ &oldloc, loc, NULL);
+ loc_wipe (&oldloc);
- return 0;
+ return 0;
+}
-out:
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
-
- if (call_count == 0) {
- AFR_STACK_DESTROY (impunge_frame);
- afr_sh_entry_impunge_entry_done (frame, this, active_src);
- }
-
- return 0;
+int
+afr_sh_nameless_lookup_cbk (call_frame_t *impunge_frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ if (op_ret < 0) {
+ afr_sh_entry_impunge_create_file (impunge_frame, this,
+ (long)cookie);
+ } else {
+ afr_sh_entry_impunge_hardlink (impunge_frame, this,
+ (long)cookie);
+ }
+ return 0;
}
+int
+afr_sh_entry_impunge_check_hardlink (call_frame_t *impunge_frame,
+ xlator_t *this,
+ int child_index, struct iatt *stbuf)
+{
+ afr_private_t *priv = NULL;
+ call_frame_t *frame = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ afr_self_heal_t *sh = NULL;
+ loc_t *loc = NULL;
+ dict_t *xattr_req = NULL;
+ loc_t oldloc = {0};
+ int ret = -1;
+
+ priv = this->private;
+ AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
+ frame, local, sh);
+ loc = &impunge_local->loc;
+
+ xattr_req = dict_new ();
+ if (!xattr_req)
+ goto out;
+ oldloc.inode = inode_ref (loc->inode);
+ uuid_copy (oldloc.gfid, stbuf->ia_gfid);
+
+ STACK_WIND_COOKIE (impunge_frame, afr_sh_nameless_lookup_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->lookup,
+ &oldloc, xattr_req);
+ ret = 0;
+out:
+ if (xattr_req)
+ dict_unref (xattr_req);
+ loc_wipe (&oldloc);
+ if (ret)
+ sh->impunge_done (frame, this, -1, ENOMEM);
+ return 0;
+}
int
afr_sh_entry_impunge_mknod (call_frame_t *impunge_frame, xlator_t *this,
- int child_index, struct stat *stbuf)
+ int child_index, struct iatt *stbuf)
{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
-
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ dict_t *dict = NULL;
+ int ret = 0;
+
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "creating missing file %s on %s",
+ impunge_local->loc.path,
+ priv->children[child_index]->name);
+
+ dict = dict_new ();
+ if (!dict)
+ gf_log (this->name, GF_LOG_ERROR, "Out of memory");
+
+ GF_ASSERT (!uuid_is_null (stbuf->ia_gfid));
+ ret = afr_set_dict_gfid (dict, stbuf->ia_gfid);
+ if (ret)
+ gf_log (this->name, GF_LOG_INFO, "%s: gfid set failed",
+ impunge_local->loc.path);
+
+ /*
+ * Reason for adding GLUSTERFS_INTERNAL_FOP_KEY :
+ *
+ * Problem:
+ * While a brick is down in a replica pair, lets say the user creates
+ * one file(file-A) and a hard link to that file(h-file-A). After the
+ * brick comes back up, entry self-heal is attempted on parent dir of
+ * these two files. As part of readdir in self-heal it reads both the
+ * entries file-A and h-file-A for both of them it does name less lookup
+ * to check if there are any hardlinks already present in the
+ * destination brick. It finds that there are no hard links already
+ * present for files file-A, h-file-A. Self-heal does mknods for both
+ * file-A and h-file-A. This leads to file-A and h-file-A not being
+ * hardlinks anymore.
+ *
+ * Fix: (More like shrinking of race-window, the race itself is still
+ * present in posix-mknod).
+ * If mknod comes with the presence of GLUSTERFS_INTERNAL_FOP_KEY then
+ * posix_mknod checks if there are already any gfid-links and does
+ * link() instead of mknod. There still can be a race where two
+ * posix_mknods same gfid see that
+ * gfid-link file is not present and proceeds with mknods and result in
+ * two different files with same gfid.
+ */
+ ret = dict_set_str (dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
+ if (ret)
+ gf_log (this->name, GF_LOG_INFO, "%s: %s set failed",
+ impunge_local->loc.path, GLUSTERFS_INTERNAL_FOP_KEY);
+
+ STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->mknod,
+ &impunge_local->loc,
+ st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type),
+ makedev (ia_major (stbuf->ia_rdev),
+ ia_minor (stbuf->ia_rdev)), 0, dict);
+
+ if (dict)
+ dict_unref (dict);
- gf_log (this->name, GF_LOG_DEBUG,
- "creating missing file %s on %s",
- impunge_local->loc.path,
- priv->children[child_index]->name);
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->mknod,
- &impunge_local->loc,
- stbuf->st_mode, stbuf->st_rdev);
-
- return 0;
+ return 0;
}
int
afr_sh_entry_impunge_mkdir (call_frame_t *impunge_frame, xlator_t *this,
- int child_index, struct stat *stbuf)
+ int child_index, struct iatt *stbuf)
{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ dict_t *dict = NULL;
+ int ret = 0;
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
+ priv = this->private;
+ impunge_local = impunge_frame->local;
- gf_log (this->name, GF_LOG_DEBUG,
- "creating missing directory %s on %s",
- impunge_local->loc.path,
- priv->children[child_index]->name);
+ dict = dict_new ();
+ if (!dict) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory");
+ return 0;
+ }
+
+ GF_ASSERT (!uuid_is_null (stbuf->ia_gfid));
+ ret = afr_set_dict_gfid (dict, stbuf->ia_gfid);
+ if (ret)
+ gf_log (this->name, GF_LOG_INFO, "%s: gfid set failed",
+ impunge_local->loc.path);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "creating missing directory %s on %s",
+ impunge_local->loc.path,
+ priv->children[child_index]->name);
+
+ STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->mkdir,
+ &impunge_local->loc,
+ st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type),
+ 0, dict);
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->mkdir,
- &impunge_local->loc, stbuf->st_mode);
+ if (dict)
+ dict_unref (dict);
- return 0;
+ return 0;
}
int
afr_sh_entry_impunge_symlink (call_frame_t *impunge_frame, xlator_t *this,
- int child_index, const char *linkname)
+ int child_index, const char *linkname)
{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
-
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "creating missing symlink %s -> %s on %s",
- impunge_local->loc.path, linkname,
- priv->children[child_index]->name);
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->symlink,
- linkname, &impunge_local->loc);
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ dict_t *dict = NULL;
+ struct iatt *buf = NULL;
+ int ret = 0;
+
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+
+ buf = &impunge_local->cont.dir_fop.buf;
+
+ dict = dict_new ();
+ if (!dict) {
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ -1, ENOMEM);
+ goto out;
+ }
- return 0;
+ GF_ASSERT (!uuid_is_null (buf->ia_gfid));
+ ret = afr_set_dict_gfid (dict, buf->ia_gfid);
+ if (ret)
+ gf_log (this->name, GF_LOG_INFO,
+ "%s: dict set gfid failed",
+ impunge_local->loc.path);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "creating missing symlink %s -> %s on %s",
+ impunge_local->loc.path, linkname,
+ priv->children[child_index]->name);
+
+ STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->symlink,
+ linkname, &impunge_local->loc, 0, dict);
+
+ if (dict)
+ dict_unref (dict);
+out:
+ return 0;
}
int
-afr_sh_entry_impunge_readlink_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- const char *linkname)
+afr_sh_entry_impunge_symlink_unlink_cbk (call_frame_t *impunge_frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int child_index = -1;
- call_frame_t *frame = NULL;
- int call_count = -1;
- int active_src = -1;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- frame = impunge_sh->sh_frame;
- active_src = impunge_sh->active_source;
-
- child_index = (long) cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "readlink of %s on %s failed (%s)",
- impunge_local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
- goto out;
- }
-
- afr_sh_entry_impunge_symlink (impunge_frame, this, child_index,
- linkname);
- return 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ int child_index = -1;
+ int call_count = -1;
+
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+
+ child_index = (long) cookie;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "unlink of %s on %s failed (%s)",
+ impunge_local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ goto out;
+ }
+ afr_sh_entry_impunge_symlink (impunge_frame, this, child_index,
+ impunge_sh->linkname);
+
+ return 0;
out:
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
-
- if (call_count == 0) {
- AFR_STACK_DESTROY (impunge_frame);
- afr_sh_entry_impunge_entry_done (frame, this, active_src);
- }
-
- return 0;
-}
+ LOCK (&impunge_frame->lock);
+ {
+ call_count = --impunge_local->call_count;
+ }
+ UNLOCK (&impunge_frame->lock);
+ if (call_count == 0)
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ op_ret, op_errno);
-int
-afr_sh_entry_impunge_readlink (call_frame_t *impunge_frame, xlator_t *this,
- int child_index, struct stat *stbuf)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int active_src = -1;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- active_src = impunge_sh->active_source;
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_readlink_cbk,
- (void *) (long) child_index,
- priv->children[active_src],
- priv->children[active_src]->fops->readlink,
- &impunge_local->loc, 4096);
-
- return 0;
+ return 0;
}
int
-afr_sh_entry_impunge_recreate_lookup_cbk (call_frame_t *impunge_frame,
- void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *buf,
- dict_t *xattr)
+afr_sh_entry_impunge_symlink_unlink (call_frame_t *impunge_frame, xlator_t *this,
+ int child_index)
{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int active_src = 0;
- int type = 0;
- int child_index = 0;
- call_frame_t *frame = NULL;
- int call_count = 0;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- frame = impunge_sh->sh_frame;
-
- child_index = (long) cookie;
-
- active_src = impunge_sh->active_source;
-
- if (op_ret != 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s on %s (for %s) failed (%s)",
- impunge_local->loc.path,
- priv->children[active_src]->name,
- priv->children[child_index]->name,
- strerror (op_errno));
- goto out;
- }
-
- impunge_local->cont.lookup.buf = *buf;
- type = (buf->st_mode & S_IFMT);
-
- switch (type) {
- case S_IFSOCK:
- case S_IFREG:
- case S_IFBLK:
- case S_IFCHR:
- case S_IFIFO:
- afr_sh_entry_impunge_mknod (impunge_frame, this,
- child_index, buf);
- break;
- case S_IFLNK:
- afr_sh_entry_impunge_readlink (impunge_frame, this,
- child_index, buf);
- break;
- case S_IFDIR:
- afr_sh_entry_impunge_mkdir (impunge_frame, this,
- child_index, buf);
- break;
- default:
- gf_log (this->name, GF_LOG_ERROR,
- "%s has unknown file type on %s: 0%o",
- impunge_local->loc.path,
- priv->children[active_src]->name, type);
- goto out;
- break;
- }
-
- return 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
-out:
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
-
- if (call_count == 0) {
- AFR_STACK_DESTROY (impunge_frame);
- afr_sh_entry_impunge_entry_done (frame, this, active_src);
- }
-
- return 0;
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "unlinking symlink %s with wrong target on %s",
+ impunge_local->loc.path,
+ priv->children[child_index]->name);
+
+ STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_symlink_unlink_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->unlink,
+ &impunge_local->loc, 0, NULL);
+
+ return 0;
}
int
-afr_sh_entry_impunge_recreate (call_frame_t *impunge_frame, xlator_t *this,
- int child_index)
+afr_sh_entry_impunge_readlink_sink_cbk (call_frame_t *impunge_frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ const char *linkname, struct iatt *sbuf, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int active_src = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ int child_index = -1;
+ int call_count = -1;
+ int active_src = -1;
+
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+ active_src = impunge_sh->active_source;
+
+ child_index = (long) cookie;
+
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ gf_log (this->name, GF_LOG_INFO,
+ "readlink of %s on %s failed (%s)",
+ impunge_local->loc.path,
+ priv->children[active_src]->name,
+ strerror (op_errno));
+ goto out;
+ }
+ /* symlink doesn't exist on the sink */
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
+ if ((op_ret == -1) && (op_errno == ENOENT)) {
+ afr_sh_entry_impunge_symlink (impunge_frame, this,
+ child_index, impunge_sh->linkname);
+ return 0;
+ }
+
+
+ /* symlink exists on the sink, so check if targets match */
+
+ if (strcmp (linkname, impunge_sh->linkname) == 0) {
+ /* targets match, nothing to do */
- active_src = impunge_sh->active_source;
+ goto out;
+ } else {
+ /*
+ * Hah! Sneaky wolf in sheep's clothing!
+ */
+ afr_sh_entry_impunge_symlink_unlink (impunge_frame, this,
+ child_index);
+ return 0;
+ }
+
+out:
+ LOCK (&impunge_frame->lock);
+ {
+ call_count = --impunge_local->call_count;
+ }
+ UNLOCK (&impunge_frame->lock);
- STACK_WIND_COOKIE (impunge_frame,
- afr_sh_entry_impunge_recreate_lookup_cbk,
- (void *) (long) child_index,
- priv->children[active_src],
- priv->children[active_src]->fops->lookup,
- &impunge_local->loc, 0);
+ if (call_count == 0)
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ op_ret, op_errno);
- return 0;
+ return 0;
}
int
-afr_sh_entry_impunge_entry_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *buf, dict_t *x)
+afr_sh_entry_impunge_readlink_sink (call_frame_t *impunge_frame, xlator_t *this,
+ int child_index)
{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int call_count = 0;
- int child_index = 0;
- call_frame_t *frame = NULL;
- int active_src = 0;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- frame = impunge_sh->sh_frame;
- child_index = (long) cookie;
- active_src = impunge_sh->active_source;
-
- if (op_ret == -1 && op_errno == ENOENT) {
- /* decrease call_count in recreate-callback */
- gf_log (this->name, GF_LOG_TRACE,
- "missing entry %s on %s",
- impunge_local->loc.path,
- priv->children[child_index]->name);
-
- afr_sh_entry_impunge_recreate (impunge_frame, this,
- child_index);
- return 0;
- }
-
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "%s exists under %s",
- impunge_local->loc.path,
- priv->children[child_index]->name);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s under %s failed (%s)",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- }
-
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
-
- if (call_count == 0) {
- AFR_STACK_DESTROY (impunge_frame);
- afr_sh_entry_impunge_entry_done (frame, this, active_src);
- }
-
- return 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "checking symlink target of %s on %s",
+ impunge_local->loc.path, priv->children[child_index]->name);
+
+ STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_readlink_sink_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->readlink,
+ &impunge_local->loc, 4096, NULL);
+
+ return 0;
}
int
-afr_sh_entry_impunge_entry (call_frame_t *frame, xlator_t *this,
- char *name)
+afr_sh_entry_impunge_readlink_cbk (call_frame_t *impunge_frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ const char *linkname, struct iatt *sbuf, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int ret = -1;
- call_frame_t *impunge_frame = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int active_src = 0;
- int i = 0;
- int call_count = 0;
- int op_errno = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- active_src = sh->active_source;
-
- if ((strcmp (name, ".") == 0)
- || (strcmp (name, "..") == 0)) {
- gf_log (this->name, GF_LOG_TRACE,
- "skipping inspection of %s under %s",
- name, local->loc.path);
- goto out;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "inspecting existance of %s under %s",
- name, local->loc.path);
-
- impunge_frame = copy_frame (frame);
- if (!impunge_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
-
- ALLOC_OR_GOTO (impunge_local, afr_local_t, out);
-
- impunge_frame->local = impunge_local;
- impunge_sh = &impunge_local->self_heal;
- impunge_sh->sh_frame = frame;
- impunge_sh->active_source = active_src;
-
- ret = build_child_loc (this, &impunge_local->loc, &local->loc, name);
- if (ret != 0) {
- goto out;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (i == active_src)
- continue;
- if (local->child_up[i] == 0)
- continue;
- if (sh->sources[i] == 1)
- continue;
- call_count++;
- }
-
- impunge_local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (i == active_src)
- continue;
- if (local->child_up[i] == 0)
- continue;
- if (sh->sources[i] == 1)
- continue;
-
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s on %s", impunge_local->loc.path,
- priv->children[i]->name);
-
- STACK_WIND_COOKIE (impunge_frame,
- afr_sh_entry_impunge_entry_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->lookup,
- &impunge_local->loc, 0);
-
- if (!--call_count)
- break;
- }
-
- ret = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ int child_index = -1;
+ int call_count = -1;
+ int active_src = -1;
+
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+ active_src = impunge_sh->active_source;
+
+ child_index = (long) cookie;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "readlink of %s on %s failed (%s)",
+ impunge_local->loc.path,
+ priv->children[active_src]->name,
+ strerror (op_errno));
+ goto out;
+ }
+
+ impunge_sh->linkname = gf_strdup (linkname);
+ afr_sh_entry_impunge_readlink_sink (impunge_frame, this, child_index);
+
+ return 0;
+
out:
- if (ret == -1)
- afr_sh_entry_impunge_entry_done (frame, this, active_src);
-
- return 0;
+ LOCK (&impunge_frame->lock);
+ {
+ call_count = --impunge_local->call_count;
+ }
+ UNLOCK (&impunge_frame->lock);
+
+ if (call_count == 0)
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ op_ret, op_errno);
+
+ return 0;
}
int
-afr_sh_entry_impunge_readdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
+afr_sh_entry_impunge_readlink (call_frame_t *impunge_frame, xlator_t *this,
+ int child_index, struct iatt *stbuf)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- gf_dirent_t *entry = NULL;
- off_t last_offset = 0;
- int active_src = 0;
- int entry_count = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- active_src = sh->active_source;
-
- if (op_ret <= 0) {
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir of %s on subvolume %s failed (%s)",
- local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "readdir of %s on subvolume %s complete",
- local->loc.path,
- priv->children[active_src]->name);
- }
-
- afr_sh_entry_impunge_all (frame, this);
- return 0;
- }
-
- list_for_each_entry (entry, &entries->list, list) {
- last_offset = entry->d_off;
- entry_count++;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "readdir'ed %d entries from %s",
- entry_count, priv->children[active_src]->name);
-
- sh->offset = last_offset;
- local->call_count = entry_count;
-
- list_for_each_entry (entry, &entries->list, list) {
- afr_sh_entry_impunge_entry (frame, this, entry->d_name);
- }
-
- return 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ int active_src = -1;
+
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+ active_src = impunge_sh->active_source;
+ impunge_local->cont.dir_fop.buf = *stbuf;
+
+ STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_readlink_cbk,
+ (void *) (long) child_index,
+ priv->children[active_src],
+ priv->children[active_src]->fops->readlink,
+ &impunge_local->loc, 4096, NULL);
+
+ return 0;
}
-
int
-afr_sh_entry_impunge_subvol (call_frame_t *frame, xlator_t *this,
- int active_src)
+afr_sh_entry_impunge_create (call_frame_t *impunge_frame, xlator_t *this,
+ int child_index)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- STACK_WIND (frame, afr_sh_entry_impunge_readdir_cbk,
- priv->children[active_src],
- priv->children[active_src]->fops->readdir,
- sh->healing_fd, sh->block_size, sh->offset);
+ call_frame_t *frame = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ ia_type_t type = IA_INVAL;
+ int active_src = 0;
+ struct iatt *buf = NULL;
+
+ AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
+ frame, local, sh);
+ active_src = impunge_sh->active_source;
+ afr_update_loc_gfids (&impunge_local->loc, &impunge_sh->entrybuf,
+ &impunge_sh->parentbuf);
+
+ buf = &impunge_sh->entrybuf;
+ type = buf->ia_type;
+
+ switch (type) {
+ case IA_IFSOCK:
+ case IA_IFREG:
+ case IA_IFBLK:
+ case IA_IFCHR:
+ case IA_IFIFO:
+ case IA_IFLNK:
+ afr_sh_entry_impunge_check_hardlink (impunge_frame, this,
+ child_index, buf);
+ break;
+ case IA_IFDIR:
+ afr_sh_entry_impunge_mkdir (impunge_frame, this,
+ child_index, buf);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s has unknown file type on %s: 0%o",
+ impunge_local->loc.path,
+ priv->children[active_src]->name, type);
+ sh->impunge_done (frame, this, -1, EINVAL);
+ break;
+ }
- return 0;
+ return 0;
}
-
int
-afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this)
+afr_sh_entry_impunge_create_file (call_frame_t *impunge_frame, xlator_t *this,
+ int child_index)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int active_src = -1;
+ call_frame_t *frame = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ ia_type_t type = IA_INVAL;
+ int active_src = 0;
+ struct iatt *buf = NULL;
+
+ AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
+ frame, local, sh);
+ active_src = impunge_sh->active_source;
+ buf = &impunge_sh->entrybuf;
+ type = buf->ia_type;
+
+ switch (type) {
+ case IA_IFSOCK:
+ case IA_IFREG:
+ case IA_IFBLK:
+ case IA_IFCHR:
+ case IA_IFIFO:
+ afr_sh_entry_impunge_mknod (impunge_frame, this,
+ child_index, buf);
+ break;
+ case IA_IFLNK:
+ afr_sh_entry_impunge_readlink (impunge_frame, this,
+ child_index, buf);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s has unknown file type on %s: 0%o",
+ impunge_local->loc.path,
+ priv->children[active_src]->name, type);
+ sh->impunge_done (frame, this, -1, EINVAL);
+ break;
+ }
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
+ return 0;
+}
- sh->offset = 0;
+gf_boolean_t
+afr_sh_need_recreate (afr_self_heal_t *impunge_sh, unsigned int child,
+ unsigned int child_count)
+{
+ gf_boolean_t recreate = _gf_false;
- active_src = next_active_source (frame, this, sh->active_source);
- sh->active_source = active_src;
+ GF_ASSERT (impunge_sh->child_errno);
- if (sh->op_failed) {
- afr_sh_entry_finish (frame, this);
- return 0;
- }
+ if (child == impunge_sh->active_source)
+ goto out;
- if (active_src == -1) {
- /* completed creating missing files on all subvolumes */
- afr_sh_entry_expunge_all (frame, this);
- return 0;
- }
+ if (IA_IFLNK == impunge_sh->entrybuf.ia_type) {
+ recreate = _gf_true;
+ goto out;
+ }
+
+ if (impunge_sh->child_errno[child] == ENOENT)
+ recreate = _gf_true;
+out:
+ return recreate;
+}
- gf_log (this->name, GF_LOG_TRACE,
- "impunging entries of %s on %s to other sinks",
- local->loc.path, priv->children[active_src]->name);
+unsigned int
+afr_sh_recreate_count (afr_self_heal_t *impunge_sh, int *sources,
+ unsigned int child_count)
+{
+ int count = 0;
+ int i = 0;
- afr_sh_entry_impunge_subvol (frame, this, active_src);
+ for (i = 0; i < child_count; i++) {
+ if (afr_sh_need_recreate (impunge_sh, i, child_count))
+ count++;
+ }
- return 0;
+ return count;
}
-
int
-afr_sh_entry_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+afr_sh_entry_call_impunge_recreate (call_frame_t *impunge_frame,
+ xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int child_index = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- child_index = (long) cookie;
-
- /* TODO: some of the open's might fail.
- In that case, modify cleanup fn to send flush on those
- fd's which are already open */
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "opendir of %s failed on child %s (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- sh->op_failed = 1;
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (sh->op_failed) {
- afr_sh_entry_finish (frame, this);
- return 0;
- }
- gf_log (this->name, GF_LOG_TRACE,
- "fd for %s opened, commencing sync",
- local->loc.path);
-
- sh->active_source = -1;
- afr_sh_entry_impunge_all (frame, this);
- }
-
- return 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ unsigned int recreate_count = 0;
+ int i = 0;
+ int active_src = 0;
+
+ priv = this->private;
+ AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
+ frame, local, sh);
+ active_src = impunge_sh->active_source;
+ impunge_sh->entrybuf = impunge_sh->buf[active_src];
+ impunge_sh->parentbuf = impunge_sh->parentbufs[active_src];
+ recreate_count = afr_sh_recreate_count (impunge_sh, sh->sources,
+ priv->child_count);
+ if (!recreate_count) {
+ afr_sh_entry_call_impunge_done (impunge_frame, this, 0, 0);
+ goto out;
+ }
+ impunge_local->call_count = recreate_count;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!impunge_local->child_up[i]) {
+ impunge_sh->child_errno[i] = ENOTCONN;
+ continue;
+ }
+ if (!afr_sh_need_recreate (impunge_sh, i, priv->child_count)) {
+ impunge_sh->child_errno[i] = EEXIST;
+ continue;
+ }
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!afr_sh_need_recreate (impunge_sh, i, priv->child_count))
+ continue;
+ (void)afr_sh_entry_impunge_create (impunge_frame, this, i);
+ recreate_count--;
+ }
+ GF_ASSERT (!recreate_count);
+out:
+ return 0;
}
+void
+afr_sh_entry_common_lookup_done (call_frame_t *impunge_frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ unsigned int gfid_miss_count = 0;
+ unsigned int children_up_count = 0;
+ uuid_t gfid = {0};
+ int active_src = 0;
+
+ priv = this->private;
+ AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
+ frame, local, sh);
+ active_src = impunge_sh->active_source;
+
+ if (op_ret < 0)
+ goto done;
+ if (impunge_sh->child_errno[active_src]) {
+ op_ret = -1;
+ op_errno = impunge_sh->child_errno[active_src];
+ goto done;
+ }
+
+ gfid_miss_count = afr_gfid_missing_count (this->name,
+ impunge_sh->success_children,
+ impunge_sh->buf, priv->child_count,
+ impunge_local->loc.path);
+ children_up_count = afr_up_children_count (impunge_local->child_up,
+ priv->child_count);
+ if ((gfid_miss_count == children_up_count) &&
+ (children_up_count < priv->child_count)) {
+ op_ret = -1;
+ op_errno = ENODATA;
+ gf_log (this->name, GF_LOG_ERROR, "Not all children are up, "
+ "gfid should not be assigned in this state for %s",
+ impunge_local->loc.path);
+ goto done;
+ }
+
+ if (gfid_miss_count) {
+ afr_update_gfid_from_iatts (gfid, impunge_sh->buf,
+ impunge_sh->success_children,
+ priv->child_count);
+ if (uuid_is_null (gfid)) {
+ sh->entries_skipped = _gf_true;
+ gf_log (this->name, GF_LOG_INFO, "%s: Skipping entry "
+ "self-heal because of gfid absence",
+ impunge_local->loc.path);
+ goto done;
+ }
+ afr_sh_common_lookup (impunge_frame, this, &impunge_local->loc,
+ afr_sh_entry_common_lookup_done, gfid,
+ AFR_LOOKUP_FAIL_CONFLICTS |
+ AFR_LOOKUP_FAIL_MISSING_GFIDS,
+ NULL);
+ } else {
+ afr_sh_entry_call_impunge_recreate (impunge_frame, this);
+ }
+ return;
+done:
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ op_ret, op_errno);
+ return;
+}
int
-afr_sh_entry_open (call_frame_t *frame, xlator_t *this)
+afr_sh_entry_impunge_entry (call_frame_t *frame, xlator_t *this,
+ gf_dirent_t *entry)
{
- int i = 0;
- int call_count = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ int ret = -1;
+ call_frame_t *impunge_frame = NULL;
+ afr_local_t *impunge_local = NULL;
+ int active_src = 0;
+ int op_errno = 0;
+ int op_ret = -1;
+
+ local = frame->local;
+ sh = &local->self_heal;
- int source = -1;
- int *sources = NULL;
+ active_src = sh->active_source;
+ sh->impunge_done = afr_sh_entry_impunge_entry_done;
- fd_t *fd = NULL;
+ if (can_skip_entry_self_heal (entry->d_name, &local->loc)) {
+ op_ret = 0;
+ goto out;
+ }
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- afr_self_heal_t *sh = NULL;
+ gf_log (this->name, GF_LOG_TRACE,
+ "inspecting existence of %s under %s",
+ entry->d_name, local->loc.path);
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ ret = afr_impunge_frame_create (frame, this, active_src,
+ &impunge_frame);
+ if (ret) {
+ op_errno = -ret;
+ goto out;
+ }
- source = local->self_heal.source;
- sources = local->self_heal.sources;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+ ret = afr_build_child_loc (this, &impunge_local->loc, &local->loc,
+ entry->d_name);
+ loc_copy (&impunge_sh->parent_loc, &local->loc);
+ if (ret != 0) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ afr_sh_common_lookup (impunge_frame, this, &impunge_local->loc,
+ afr_sh_entry_common_lookup_done, NULL,
+ AFR_LOOKUP_FAIL_CONFLICTS, NULL);
+
+ op_ret = 0;
+out:
+ if (ret) {
+ if (impunge_frame)
+ AFR_STACK_DESTROY (impunge_frame);
+ sh->impunge_done (frame, this, op_ret, op_errno);
+ }
- sh->block_size = 131072;
- sh->offset = 0;
+ return 0;
+}
- call_count = sh->active_sinks;
- if (source != -1)
- call_count++;
- local->call_count = call_count;
+int
+afr_sh_entry_impunge_readdir_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ gf_dirent_t *entries, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ gf_dirent_t *entry = NULL;
+ off_t last_offset = 0;
+ int active_src = 0;
+ int entry_count = 0;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
- fd = fd_create (local->loc.inode, frame->root->pid);
- sh->healing_fd = fd;
+ active_src = sh->active_source;
- if (source != -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "opening directory %s on subvolume %s (source)",
- local->loc.path, priv->children[source]->name);
+ if (op_ret <= 0) {
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "readdir of %s on subvolume %s failed (%s)",
+ local->loc.path,
+ priv->children[active_src]->name,
+ strerror (op_errno));
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ } else {
+ gf_log (this->name, GF_LOG_TRACE,
+ "readdir of %s on subvolume %s complete",
+ local->loc.path,
+ priv->children[active_src]->name);
+ }
- /* open source */
- STACK_WIND_COOKIE (frame, afr_sh_entry_opendir_cbk,
- (void *) (long) source,
- priv->children[source],
- priv->children[source]->fops->opendir,
- &local->loc, fd);
- call_count--;
- }
+ afr_sh_entry_impunge_all (frame, this);
+ return 0;
+ }
- /* open sinks */
- for (i = 0; i < priv->child_count; i++) {
- if (sources[i] || !local->child_up[i])
- continue;
+ list_for_each_entry (entry, &entries->list, list) {
+ last_offset = entry->d_off;
+ entry_count++;
+ }
- gf_log (this->name, GF_LOG_TRACE,
- "opening directory %s on subvolume %s (sink)",
- local->loc.path, priv->children[i]->name);
+ gf_log (this->name, GF_LOG_DEBUG,
+ "readdir'ed %d entries from %s",
+ entry_count, priv->children[active_src]->name);
- STACK_WIND_COOKIE (frame, afr_sh_entry_opendir_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->opendir,
- &local->loc, fd);
+ sh->offset = last_offset;
+ local->call_count = entry_count;
- if (!--call_count)
- break;
- }
+ list_for_each_entry (entry, &entries->list, list) {
+ afr_sh_entry_impunge_entry (frame, this, entry);
+ }
- return 0;
+ return 0;
}
int
-afr_sh_entry_sync_prepare (call_frame_t *frame, xlator_t *this)
+afr_sh_entry_impunge_subvol (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int active_sinks = 0;
- int source = 0;
- int i = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- source = sh->source;
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] == 0 && local->child_up[i] == 1) {
- active_sinks++;
- sh->success[i] = 1;
- }
- }
- if (source != -1)
- sh->success[source] = 1;
-
- if (active_sinks == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "no active sinks for self-heal on dir %s",
- local->loc.path);
- afr_sh_entry_finish (frame, this);
- return 0;
- }
- if (source == -1 && active_sinks < 2) {
- gf_log (this->name, GF_LOG_TRACE,
- "cannot sync with 0 sources and 1 sink on dir %s",
- local->loc.path);
- afr_sh_entry_finish (frame, this);
- return 0;
- }
- sh->active_sinks = active_sinks;
-
- if (source != -1)
- gf_log (this->name, GF_LOG_DEBUG,
- "self-healing directory %s from subvolume %s to "
- "%d other",
- local->loc.path, priv->children[source]->name,
- active_sinks);
- else
- gf_log (this->name, GF_LOG_DEBUG,
- "no active sources for %s found. "
- "merging all entries as a conservative decision",
- local->loc.path);
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int32_t active_src = 0;
- afr_sh_entry_open (frame, this);
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+ active_src = sh->active_source;
+ gf_log (this->name, GF_LOG_DEBUG, "%s: readdir from offset %zd",
+ local->loc.path, sh->offset);
+
+ STACK_WIND (frame, afr_sh_entry_impunge_readdir_cbk,
+ priv->children[active_src],
+ priv->children[active_src]->fops->readdirp,
+ sh->healing_fd, sh->block_size, sh->offset, NULL);
- return 0;
+ return 0;
}
int
-afr_sh_entry_fix (call_frame_t *frame, xlator_t *this)
+afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int source = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int active_src = -1;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
- afr_sh_build_pending_matrix (priv, sh->pending_matrix, sh->xattr,
- priv->child_count, AFR_ENTRY_TRANSACTION);
+ sh->offset = 0;
- afr_sh_print_pending_matrix (sh->pending_matrix, this);
+ active_src = next_active_source (frame, this, sh->active_source);
+ sh->active_source = active_src;
- afr_sh_mark_sources (sh, priv->child_count,
- AFR_SELF_HEAL_ENTRY);
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
+ afr_sh_entry_finish (frame, this);
+ return 0;
+ }
- afr_sh_supress_errenous_children (sh->sources, sh->child_errno,
- priv->child_count);
+ if (active_src == -1) {
+ /* completed creating missing files on all subvolumes */
+ afr_sh_entry_erase_pending (frame, this);
+ return 0;
+ }
- source = afr_sh_select_source (sh->sources, priv->child_count);
- sh->source = source;
+ gf_log (this->name, GF_LOG_TRACE,
+ "impunging entries of %s on %s to other sinks",
+ local->loc.path, priv->children[active_src]->name);
- afr_sh_entry_sync_prepare (frame, this);
+ afr_sh_entry_impunge_subvol (frame, this);
- return 0;
+ return 0;
}
-
int
-afr_sh_entry_lookup_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *buf, dict_t *xattr)
+afr_sh_entry_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- int call_count = -1;
- int child_index = (long) cookie;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int child_index = 0;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ child_index = (long) cookie;
+
+ /* TODO: some of the open's might fail.
+ In that case, modify cleanup fn to send flush on those
+ fd's which are already open */
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "opendir of %s failed on child %s (%s)",
+ local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ }
+ }
+ UNLOCK (&frame->lock);
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- sh->xattr[child_index] = dict_ref (xattr);
- sh->buf[child_index] = *buf;
- }
- }
- UNLOCK (&frame->lock);
+ call_count = afr_frame_return (frame);
- call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
+ afr_sh_entry_finish (frame, this);
+ return 0;
+ }
+ gf_log (this->name, GF_LOG_TRACE,
+ "fd for %s opened, commencing sync",
+ local->loc.path);
- if (call_count == 0) {
- afr_sh_entry_fix (frame, this);
- }
+ sh->active_source = -1;
+ afr_sh_entry_expunge_all (frame, this);
+ }
- return 0;
+ return 0;
}
-
int
-afr_sh_entry_lookup (call_frame_t *frame, xlator_t *this)
+afr_sh_entry_open (call_frame_t *frame, xlator_t *this)
{
- afr_self_heal_t * sh = NULL;
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- dict_t *xattr_req = NULL;
- int ret = 0;
- int call_count = 0;
- int i = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- call_count = local->child_count;
-
- local->call_count = call_count;
-
- xattr_req = dict_new();
- if (xattr_req) {
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_uint64 (xattr_req,
- priv->pending_key[i],
- 3 * sizeof(int32_t));
- }
+ int i = 0;
+ int call_count = 0;
+
+ int source = -1;
+ int *sources = NULL;
+
+ fd_t *fd = NULL;
+
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ source = local->self_heal.source;
+ sources = local->self_heal.sources;
+
+ sh->block_size = priv->sh_readdir_size;
+ sh->offset = 0;
+
+ call_count = sh->active_sinks;
+ if (source != -1)
+ call_count++;
+
+ local->call_count = call_count;
+
+ fd = fd_create (local->loc.inode, frame->root->pid);
+ sh->healing_fd = fd;
+
+ if (source != -1) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "opening directory %s on subvolume %s (source)",
+ local->loc.path, priv->children[source]->name);
+
+ /* open source */
+ STACK_WIND_COOKIE (frame, afr_sh_entry_opendir_cbk,
+ (void *) (long) source,
+ priv->children[source],
+ priv->children[source]->fops->opendir,
+ &local->loc, fd, NULL);
+ call_count--;
}
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame,
- afr_sh_entry_lookup_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->lookup,
- &local->loc, xattr_req);
- if (!--call_count)
- break;
- }
- }
-
- if (xattr_req)
- dict_unref (xattr_req);
-
- return 0;
-}
+ /* open sinks */
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i] || !local->child_up[i])
+ continue;
+ gf_log (this->name, GF_LOG_TRACE,
+ "opening directory %s on subvolume %s (sink)",
+ local->loc.path, priv->children[i]->name);
+ STACK_WIND_COOKIE (frame, afr_sh_entry_opendir_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->opendir,
+ &local->loc, fd, NULL);
-int
-afr_sh_entry_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
-
- /* TODO: what if lock fails? */
-
- local = frame->local;
- sh = &local->self_heal;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- sh->op_failed = 1;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "locking inode of %s on child %d failed: %s",
- local->loc.path, child_index,
- strerror (op_errno));
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "inode of %s on child %d locked",
- local->loc.path, child_index);
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (sh->op_failed == 1) {
- afr_sh_entry_finish (frame, this);
- return 0;
- }
-
- afr_sh_entry_lookup (frame, this);
- }
-
- return 0;
+ if (!--call_count)
+ break;
+ }
+
+ return 0;
}
int
-afr_sh_entry_lock (call_frame_t *frame, xlator_t *this)
+afr_sh_entry_sync_prepare (call_frame_t *frame, xlator_t *this)
{
- int i = 0;
- int call_count = 0;
-
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- afr_self_heal_t * sh = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int source = 0;
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ source = sh->source;
- call_count = local->child_count;
+ afr_sh_mark_source_sinks (frame, this);
+ if (source != -1)
+ sh->success[source] = 1;
- local->call_count = call_count;
+ if (sh->active_sinks == 0) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "no active sinks for self-heal on dir %s",
+ local->loc.path);
+ afr_sh_entry_finish (frame, this);
+ return 0;
+ }
+ if (source == -1 && sh->active_sinks < 2) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "cannot sync with 0 sources and 1 sink on dir %s",
+ local->loc.path);
+ afr_sh_entry_finish (frame, this);
+ return 0;
+ }
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- gf_log (this->name, GF_LOG_TRACE,
- "locking %s on subvolume %s",
- local->loc.path, priv->children[i]->name);
+ if (source != -1)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "self-healing directory %s from subvolume %s to "
+ "%d other",
+ local->loc.path, priv->children[source]->name,
+ sh->active_sinks);
+ else
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no active sources for %s found. "
+ "merging all entries as a conservative decision",
+ local->loc.path);
- STACK_WIND_COOKIE (frame, afr_sh_entry_lock_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name,
- &local->loc, NULL,
- ENTRYLK_LOCK_NB, ENTRYLK_WRLCK);
- if (!--call_count)
- break;
- }
- }
+ sh->actual_sh_started = _gf_true;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_SYNC_BEGIN);
+ afr_sh_entry_open (frame, this);
- return 0;
+ return 0;
}
-int
-afr_self_heal_entry (call_frame_t *frame, xlator_t *this)
+void
+afr_sh_entry_fix (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int source = 0;
+ int nsources = 0;
+ int32_t subvol_status = 0;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ if (op_ret < 0) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_set_error (sh, op_errno);
+ afr_sh_entry_finish (frame, this);
+ goto out;
+ }
+
+ if (sh->forced_merge) {
+ sh->source = -1;
+ goto heal;
+ }
+
+ nsources = afr_build_sources (this, sh->xattr, sh->buf,
+ sh->pending_matrix, sh->sources,
+ sh->success_children,
+ AFR_ENTRY_TRANSACTION, &subvol_status,
+ _gf_true);
+ if ((subvol_status & ALL_FOOLS) ||
+ (subvol_status & SPLIT_BRAIN)) {
+ gf_log (this->name, GF_LOG_INFO, "%s: Performing conservative "
+ "merge", local->loc.path);
+ source = -1;
+ memset (sh->sources, 0,
+ sizeof (*sh->sources) * priv->child_count);
+ } else if (nsources == 0) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "No self-heal needed for %s",
+ local->loc.path);
+
+ afr_sh_entry_finish (frame, this);
+ return;
+ } else {
+ source = afr_sh_select_source (sh->sources, priv->child_count);
+ }
+ sh->source = source;
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
+ afr_reset_children (sh->fresh_children, priv->child_count);
+ afr_get_fresh_children (sh->success_children, sh->sources,
+ sh->fresh_children, priv->child_count);
+ if (sh->source >= 0)
+ afr_inode_set_read_ctx (this, sh->inode, sh->source,
+ sh->fresh_children);
- if (local->need_entry_self_heal && priv->entry_self_heal) {
- afr_sh_entry_lock (frame, this);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "proceeding to completion on %s",
- local->loc.path);
- afr_sh_entry_done (frame, this);
- }
+heal:
+ afr_sh_entry_sync_prepare (frame, this);
+out:
+ return;
+}
+
+int
+afr_sh_post_nonblocking_entry_cbk (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ sh = &local->self_heal;
+
+ if (int_lock->lock_op_ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Non Blocking entrylks "
+ "failed for %s.", local->loc.path);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_entry_done (frame, this);
+ } else {
+
+ gf_log (this->name, GF_LOG_DEBUG, "Non Blocking entrylks done "
+ "for %s. Proceeding to FOP", local->loc.path);
+ afr_sh_common_lookup (frame, this, &local->loc,
+ afr_sh_entry_fix, NULL,
+ AFR_LOOKUP_FAIL_CONFLICTS |
+ AFR_LOOKUP_FAIL_MISSING_GFIDS,
+ NULL);
+ }
- return 0;
+ return 0;
}
+int
+afr_self_heal_entry (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ sh->sh_type_in_action = AFR_SELF_HEAL_ENTRY;
+
+ if (local->self_heal.do_entry_self_heal && priv->entry_self_heal) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
+ afr_sh_entrylk (frame, this, &local->loc, NULL,
+ afr_sh_post_nonblocking_entry_cbk);
+ } else {
+ gf_log (this->name, GF_LOG_TRACE,
+ "proceeding to completion on %s",
+ local->loc.path);
+ afr_sh_entry_done (frame, this);
+ }
+
+ return 0;
+}
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index 966b754b3..fd5da6cfd 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <libgen.h>
@@ -52,755 +43,728 @@
int
afr_sh_metadata_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
-// memset (sh->child_errno, 0, sizeof (int) * priv->child_count);
- memset (sh->buf, 0, sizeof (struct stat) * priv->child_count);
- memset (sh->success, 0, sizeof (int) * priv->child_count);
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i])
- dict_unref (sh->xattr[i]);
- sh->xattr[i] = NULL;
- }
-
- if (local->govinda_gOvinda) {
- gf_log (this->name, GF_LOG_DEBUG,
- "aborting selfheal of %s",
- local->loc.path);
- sh->completion_cbk (frame, this);
- } else {
- if (S_ISREG (local->cont.lookup.buf.st_mode)) {
- gf_log (this->name, GF_LOG_TRACE,
- "proceeding to data check on %s",
- local->loc.path);
- afr_self_heal_data (frame, this);
- return 0;
- }
-
- if (S_ISDIR (local->cont.lookup.buf.st_mode)) {
- gf_log (this->name, GF_LOG_TRACE,
- "proceeding to entry check on %s",
- local->loc.path);
- afr_self_heal_entry (frame, this);
- return 0;
- }
- gf_log (this->name, GF_LOG_DEBUG,
- "completed self heal of %s",
- local->loc.path);
-
- sh->completion_cbk (frame, this);
- }
-
- return 0;
-}
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ local = frame->local;
+ sh = &local->self_heal;
-int
-afr_sh_metadata_unlck_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t *local = NULL;
- int call_count = 0;
+ afr_sh_reset (frame, this);
+ if (IA_ISDIR (sh->type)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "proceeding to entry check on %s",
+ local->loc.path);
+ afr_self_heal_entry (frame, this);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "proceeding to data check on %s",
+ local->loc.path);
+ afr_self_heal_data (frame, this);
+ }
+ return 0;
+}
- local = frame->local;
+int
+afr_sh_inode_unlock (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
- call_count = afr_frame_return (frame);
+ local = frame->local;
+ int_lock = &local->internal_lock;
- if (call_count == 0)
- afr_sh_metadata_done (frame, this);
+ int_lock->lock_cbk = afr_sh_metadata_done;
+ afr_unlock (frame, this);
- return 0;
+ return 0;
}
-
int
afr_sh_metadata_finish (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int call_count = 0;
- struct flock flock = {0, };
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- call_count = local->child_count;
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- flock.l_start = 0;
- flock.l_len = 0;
- flock.l_type = F_UNLCK;
-
- if (local->child_up[i]) {
- gf_log (this->name, GF_LOG_TRACE,
- "unlocking %s on subvolume %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND (frame, afr_sh_metadata_unlck_cbk,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- this->name,
- &local->loc, F_SETLK, &flock);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
+ afr_sh_inode_unlock (frame, this);
+ return 0;
+}
int
-afr_sh_metadata_erase_pending_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xattr)
+afr_sh_metadata_fail (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
- LOCK (&frame->lock);
- {
- }
- UNLOCK (&frame->lock);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_metadata_finish (frame, this);
+ return 0;
+}
- call_count = afr_frame_return (frame);
+int
+afr_sh_metadata_erase_pending_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xattr, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int call_count = 0;
+ long i = 0;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+ sh = &local->self_heal;
+ i = (long)cookie;
+
+ if ((!IA_ISREG (sh->buf[sh->source].ia_type)) &&
+ (!IA_ISDIR (sh->buf[sh->source].ia_type))) {
+ afr_children_add_child (sh->fresh_children, i,
+ priv->child_count);
+ }
+ call_count = afr_frame_return (frame);
- if (call_count == 0)
- afr_sh_metadata_finish (frame, this);
+ if (call_count == 0) {
+ if ((!IA_ISREG (sh->buf[sh->source].ia_type)) &&
+ (!IA_ISDIR (sh->buf[sh->source].ia_type))) {
+ afr_inode_set_read_ctx (this, sh->inode, sh->source,
+ sh->fresh_children);
+ }
+ afr_sh_metadata_finish (frame, this);
+ }
- return 0;
+ return 0;
}
-
int
afr_sh_metadata_erase_pending (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int i = 0;
- dict_t **erase_xattr = NULL;
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix,
- sh->success, priv->child_count,
- AFR_METADATA_TRANSACTION);
-
- erase_xattr = CALLOC (sizeof (*erase_xattr), priv->child_count);
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i]) {
- call_count++;
-
- erase_xattr[i] = get_new_dict();
- dict_ref (erase_xattr[i]);
- }
- }
-
- afr_sh_delta_to_xattr (priv, sh->delta_matrix, erase_xattr,
- priv->child_count, AFR_METADATA_TRANSACTION);
-
- local->call_count = call_count;
-
- if (call_count == 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "metadata of %s not healed on any subvolume",
- local->loc.path);
-
- afr_sh_metadata_finish (frame, this);
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (!erase_xattr[i])
- continue;
-
- gf_log (this->name, GF_LOG_TRACE,
- "erasing pending flags from %s on %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_metadata_erase_pending_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->loc,
- GF_XATTROP_ADD_ARRAY, erase_xattr[i]);
- if (!--call_count)
- break;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (erase_xattr[i]) {
- dict_unref (erase_xattr[i]);
- }
- }
- FREE (erase_xattr);
-
- return 0;
+ afr_sh_erase_pending (frame, this, AFR_METADATA_TRANSACTION,
+ afr_sh_metadata_erase_pending_cbk,
+ afr_sh_metadata_finish);
+ return 0;
}
int
afr_sh_metadata_sync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int child_index = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int child_index = 0;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
- child_index = (long) cookie;
+ child_index = (long) cookie;
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "setting attributes failed for %s on %s (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "setting attributes failed for %s on %s (%s)",
+ local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
- sh->success[child_index] = 0;
- }
- }
- UNLOCK (&frame->lock);
+ sh->success[child_index] = 0;
+ }
+ }
+ UNLOCK (&frame->lock);
- call_count = afr_frame_return (frame);
+ call_count = afr_frame_return (frame);
- if (call_count == 0)
- afr_sh_metadata_erase_pending (frame, this);
+ if (call_count == 0) {
+ if (local->xattr_req) {
+ dict_unref (local->xattr_req);
+ local->xattr_req = NULL;
+ }
+ afr_sh_metadata_erase_pending (frame, this);
+ }
- return 0;
+ return 0;
}
int
-afr_sh_metadata_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+afr_sh_metadata_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
- afr_sh_metadata_sync_cbk (frame, cookie, this, op_ret, op_errno);
+ afr_sh_metadata_sync_cbk (frame, cookie, this, op_ret, op_errno, xdata);
- return 0;
+ return 0;
}
int
afr_sh_metadata_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_sh_metadata_sync_cbk (frame, cookie, this, op_ret, op_errno);
+ afr_sh_metadata_sync_cbk (frame, cookie, this, op_ret, op_errno, xdata);
- return 0;
+ return 0;
}
-
int
-afr_sh_metadata_sync (call_frame_t *frame, xlator_t *this, dict_t *xattr)
+afr_sh_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int source = 0;
- int active_sinks = 0;
- int call_count = 0;
- int i = 0;
- struct timespec ts[2];
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- source = sh->source;
- active_sinks = sh->active_sinks;
-
- /*
- * 4 calls per sink - chown, chmod, utimes, setxattr
- */
- if (xattr)
- call_count = active_sinks * 4;
- else
- call_count = active_sinks * 3;
-
- local->call_count = call_count;
-
-#ifdef HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC
- ts[0] = sh->buf[source].st_atim;
- ts[1] = sh->buf[source].st_mtim;
-#elif HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC
- ts[0] = sh->buf[source].st_atimespec;
- ts[1] = sh->buf[source].st_mtimespec;
-#else
- ts[0].tv_sec = sh->buf[source].st_atime;
- ts[1].tv_sec = sh->buf[source].st_mtime;
-#endif
+ int i = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (op_ret < 0) {
+ afr_sh_metadata_sync_cbk (frame, cookie,
+ this, -1, op_errno, xdata);
+ goto out;
+ }
- for (i = 0; i < priv->child_count; i++) {
- if (call_count == 0) {
- break;
- }
- if (sh->sources[i] || !local->child_up[i])
- continue;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "self-healing metadata of %s from %s to %s",
- local->loc.path, priv->children[source]->name,
- priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_metadata_attr_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->chown,
- &local->loc,
- sh->buf[source].st_uid,
- sh->buf[source].st_gid);
-
- STACK_WIND_COOKIE (frame, afr_sh_metadata_attr_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->chmod,
- &local->loc, sh->buf[source].st_mode);
-
- STACK_WIND_COOKIE (frame, afr_sh_metadata_attr_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->utimens,
- &local->loc, ts);
-
- call_count = call_count - 3;
-
- if (!xattr)
- continue;
-
- STACK_WIND_COOKIE (frame, afr_sh_metadata_xattr_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setxattr,
- &local->loc, xattr, 0);
- call_count--;
- }
-
- return 0;
+ i = (long) cookie;
+
+ STACK_WIND_COOKIE (frame, afr_sh_metadata_xattr_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->setxattr,
+ &local->loc, local->xattr_req, 0, NULL);
+
+ out:
+ return 0;
+}
+
+inline void
+afr_prune_special_keys (dict_t *xattr_dict)
+{
+ dict_del (xattr_dict, GF_SELINUX_XATTR_KEY);
}
+inline void
+afr_prune_pending_keys (dict_t *xattr_dict, afr_private_t *priv)
+{
+ int i = 0;
+
+ for (; i < priv->child_count; i++) {
+ dict_del (xattr_dict, priv->pending_key[i]);
+ }
+}
int
-afr_sh_metadata_getxattr_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
+afr_sh_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int source = 0;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
- int i;
+ priv = this->private;
+ local = frame->local;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ if (op_ret < 0) {
+ afr_sh_metadata_sync_cbk (frame, cookie,
+ this, -1, op_errno, xdata);
+ goto out;
+ }
- source = sh->source;
+ afr_prune_pending_keys (xattr, priv);
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "getxattr of %s failed on subvolume %s (%s). proceeding without xattr",
- local->loc.path, priv->children[source]->name,
- strerror (op_errno));
+ afr_prune_special_keys (xattr);
- afr_sh_metadata_sync (frame, this, NULL);
- } else {
- for (i = 0; i < priv->child_count; i++) {
- dict_del (xattr, priv->pending_key[i]);
- }
+ i = (long) cookie;
- afr_sh_metadata_sync (frame, this, xattr);
- }
+ /* send removexattr in bulk via xdata */
+ STACK_WIND_COOKIE (frame, afr_sh_removexattr_cbk,
+ cookie,
+ priv->children[i],
+ priv->children[i]->fops->removexattr,
+ &local->loc, "", xattr);
- return 0;
+ out:
+ return 0;
}
-
int
-afr_sh_metadata_sync_prepare (call_frame_t *frame, xlator_t *this)
+afr_sh_metadata_sync (call_frame_t *frame, xlator_t *this, dict_t *xattr)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int active_sinks = 0;
- int source = 0;
- int i = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- source = sh->source;
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] == 0 && local->child_up[i] == 1) {
- active_sinks++;
- sh->success[i] = 1;
- }
- }
- sh->success[source] = 1;
-
- if (active_sinks == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no active sinks for performing self-heal on file %s",
- local->loc.path);
- afr_sh_metadata_finish (frame, this);
- return 0;
- }
- sh->active_sinks = active_sinks;
-
- gf_log (this->name, GF_LOG_TRACE,
- "syncing metadata of %s from subvolume %s to %d active sinks",
- local->loc.path, priv->children[source]->name, active_sinks);
-
- STACK_WIND (frame, afr_sh_metadata_getxattr_cbk,
- priv->children[source],
- priv->children[source]->fops->getxattr,
- &local->loc, NULL);
-
- return 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int source = 0;
+ int active_sinks = 0;
+ int call_count = 0;
+ int i = 0;
+
+ struct iatt stbuf = {0,};
+ int32_t valid = 0;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ source = sh->source;
+ active_sinks = sh->active_sinks;
+
+ /*
+ * 2 calls per sink - setattr, setxattr
+ */
+ if (xattr) {
+ call_count = active_sinks * 2;
+ local->xattr_req = dict_ref (xattr);
+ } else
+ call_count = active_sinks;
+
+ local->call_count = call_count;
+
+ stbuf.ia_atime = sh->buf[source].ia_atime;
+ stbuf.ia_atime_nsec = sh->buf[source].ia_atime_nsec;
+ stbuf.ia_mtime = sh->buf[source].ia_mtime;
+ stbuf.ia_mtime_nsec = sh->buf[source].ia_mtime_nsec;
+
+ stbuf.ia_uid = sh->buf[source].ia_uid;
+ stbuf.ia_gid = sh->buf[source].ia_gid;
+
+ stbuf.ia_type = sh->buf[source].ia_type;
+ stbuf.ia_prot = sh->buf[source].ia_prot;
+
+ valid = GF_SET_ATTR_MODE |
+ GF_SET_ATTR_UID | GF_SET_ATTR_GID |
+ GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (call_count == 0) {
+ break;
+ }
+ if (sh->sources[i] || !local->child_up[i])
+ continue;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "self-healing metadata of %s from %s to %s",
+ local->loc.path, priv->children[source]->name,
+ priv->children[i]->name);
+
+ STACK_WIND_COOKIE (frame, afr_sh_metadata_setattr_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->setattr,
+ &local->loc, &stbuf, valid, NULL);
+
+ call_count--;
+
+ if (!xattr)
+ continue;
+
+ STACK_WIND_COOKIE (frame, afr_sh_getxattr_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->getxattr,
+ &local->loc, NULL, NULL);
+ call_count--;
+ }
+
+ return 0;
}
int
-afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this)
+afr_sh_metadata_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int nsources = 0;
- int source = 0;
- int i = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int source = 0;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
- afr_sh_build_pending_matrix (priv, sh->pending_matrix, sh->xattr,
- priv->child_count,
- AFR_METADATA_TRANSACTION);
+ source = sh->source;
- afr_sh_print_pending_matrix (sh->pending_matrix, this);
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "getxattr of %s failed on subvolume %s (%s). proceeding without xattr",
+ local->loc.path, priv->children[source]->name,
+ strerror (op_errno));
+
+ afr_sh_metadata_sync (frame, this, NULL);
+ } else {
+ afr_prune_pending_keys (xattr, priv);
+ afr_sh_metadata_sync (frame, this, xattr);
+ }
- nsources = afr_sh_mark_sources (sh, priv->child_count,
- AFR_SELF_HEAL_METADATA);
+ return 0;
+}
- afr_sh_supress_errenous_children (sh->sources, sh->child_errno,
- priv->child_count);
+static void
+afr_set_metadata_sh_info_str (afr_local_t *local, afr_self_heal_t *sh,
+ xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ char num[1024] = {0};
+ size_t len = 0;
+ char *string = NULL;
+ size_t off = 0;
+ char *source_child = " from source %s to";
+ char *format = " %s, ";
+ char *string_msg = " metadata self heal";
+ char *pending_matrix_str = NULL;
+ int down_child_present = 0;
+ int unknown_child_present = 0;
+ char *down_subvol_1 = " down subvolume is ";
+ char *unknown_subvol_1 = " unknown subvolume is";
+ char *down_subvol_2 = " down subvolumes are ";
+ char *unknown_subvol_2 = " unknown subvolumes are ";
+ int down_count = 0;
+ int unknown_count = 0;
+
+ priv = this->private;
+
+ pending_matrix_str = afr_get_pending_matrix_str (sh->pending_matrix,
+ this);
+
+ if (!pending_matrix_str)
+ pending_matrix_str = "";
+
+ len += snprintf (num, sizeof (num), "%s", string_msg);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if ((sh->source == i) && (local->child_up[i] == 1)) {
+ len += snprintf (num, sizeof (num), source_child,
+ priv->children[i]->name);
+ } else if ((local->child_up[i] == 1) && (sh->sources[i] == 0)) {
+ len += snprintf (num, sizeof (num), format,
+ priv->children[i]->name);
+ } else if (local->child_up[i] == 0) {
+ len += snprintf (num, sizeof (num), format,
+ priv->children[i]->name);
+ if (!down_child_present)
+ down_child_present = 1;
+ down_count++;
+ } else if (local->child_up[i] == -1) {
+ len += snprintf (num, sizeof (num), format,
+ priv->children[i]->name);
+ if (!unknown_child_present)
+ unknown_child_present = 1;
+ unknown_count++;
+ }
+ }
- if (nsources == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "No self-heal needed for %s",
- local->loc.path);
+ if (down_child_present) {
+ if (down_count > 1) {
+ len += snprintf (num, sizeof (num), "%s",
+ down_subvol_2);
+ } else {
+ len += snprintf (num, sizeof (num), "%s",
+ down_subvol_1);
+ }
+ }
+ if (unknown_child_present) {
+ if (unknown_count > 1) {
+ len += snprintf (num, sizeof (num), "%s",
+ unknown_subvol_2);
+ } else {
+ len += snprintf (num, sizeof (num), "%s",
+ unknown_subvol_1);
+ }
+ }
+
+ len ++;
+
+ string = GF_CALLOC (len, sizeof (char), gf_common_mt_char);
+ if (!string)
+ return;
+ off += snprintf (string + off, len - off, "%s", string_msg);
+ for (i=0; i < priv->child_count; i++) {
+ if ((sh->source == i) && (local->child_up[i] == 1))
+ off += snprintf (string + off, len - off, source_child,
+ priv->children[i]->name);
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if ((local->child_up[i] == 1)&& (sh->sources[i] == 0))
+ off += snprintf (string + off, len - off, format,
+ priv->children[i]->name);
+ }
+
+ if (down_child_present) {
+ if (down_count > 1) {
+ off += snprintf (string + off, len - off, "%s",
+ down_subvol_2);
+ } else {
+ off += snprintf (string + off, len - off, "%s",
+ down_subvol_1);
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i] == 0)
+ off += snprintf (string + off, len - off, format,
+ priv->children[i]->name);
+ }
+
+ if (unknown_child_present) {
+ if (unknown_count > 1) {
+ off += snprintf (string + off, len - off, "%s",
+ unknown_subvol_2);
+ } else {
+ off += snprintf (string + off, len - off, "%s",
+ unknown_subvol_1);
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i] == -1)
+ off += snprintf (string + off, len - off, format,
+ priv->children[i]->name);
+ }
+
+ gf_asprintf (&sh->metadata_sh_info, "%s metadata %s,", string,
+ pending_matrix_str);
+
+ if (pending_matrix_str && strcmp (pending_matrix_str, ""))
+ GF_FREE (pending_matrix_str);
+
+ if (string && strcmp (string, ""))
+ GF_FREE (string);
+}
+
+int
+afr_sh_metadata_sync_prepare (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int source = 0;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ source = sh->source;
+
+ afr_sh_mark_source_sinks (frame, this);
+ if (sh->active_sinks == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no active sinks for performing self-heal on file %s",
+ local->loc.path);
afr_sh_metadata_finish (frame, this);
return 0;
}
- if ((nsources == -1)
- && (priv->favorite_child != -1)
- && (sh->child_errno[priv->favorite_child] == 0)) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "syncing metadata of %s from subvolume %s to %d active sinks",
+ local->loc.path, priv->children[source]->name,
+ sh->active_sinks);
- gf_log (this->name, GF_LOG_WARNING,
- "Picking favorite child %s as authentic source to resolve conflicting metadata of %s",
- priv->children[priv->favorite_child]->name,
- local->loc.path);
+ sh->actual_sh_started = _gf_true;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_SYNC_BEGIN);
+ afr_set_metadata_sh_info_str (local, sh, this);
+ STACK_WIND (frame, afr_sh_metadata_getxattr_cbk,
+ priv->children[source],
+ priv->children[source]->fops->getxattr,
+ &local->loc, NULL, NULL);
- sh->sources[priv->favorite_child] = 1;
+ return 0;
+}
- nsources = afr_sh_source_count (sh->sources,
- priv->child_count);
- }
- if (nsources == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "Unable to self-heal permissions/ownership of '%s' "
- "(possible split-brain). Please fix the file on "
- "all backend volumes", local->loc.path);
+void
+afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int nsources = 0;
+ int source = 0;
+ int i = 0;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ if (op_ret < 0) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_set_error (sh, op_errno);
+ afr_sh_metadata_finish (frame, this);
+ goto out;
+ }
+ nsources = afr_build_sources (this, sh->xattr, sh->buf,
+ sh->pending_matrix, sh->sources,
+ sh->success_children,
+ AFR_METADATA_TRANSACTION, NULL, _gf_false);
+ if ((nsources == -1)
+ && (priv->favorite_child != -1)
+ && (sh->child_errno[priv->favorite_child] == 0)) {
+
+ gf_log (this->name, GF_LOG_WARNING,
+ "Picking favorite child %s as authentic source to resolve conflicting metadata of %s",
+ priv->children[priv->favorite_child]->name,
+ local->loc.path);
- local->govinda_gOvinda = 1;
+ sh->sources[priv->favorite_child] = 1;
- afr_sh_metadata_finish (frame, this);
- return 0;
- }
+ nsources = afr_sh_source_count (sh->sources,
+ priv->child_count);
+ }
- source = afr_sh_select_source (sh->sources, priv->child_count);
+ if (nsources == -1) {
+ afr_sh_print_split_brain_log (sh->pending_matrix, this,
+ local->loc.path);
+ afr_set_split_brain (this, sh->inode, SPB, DONT_KNOW);
+ afr_sh_metadata_fail (frame, this);
+ goto out;
+ }
+
+ afr_set_split_brain (this, sh->inode, NO_SPB, DONT_KNOW);
+ if (nsources == 0) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "No self-heal needed for %s",
+ local->loc.path);
+
+ afr_sh_metadata_finish (frame, this);
+ goto out;
+ }
+
+ source = afr_sh_select_source (sh->sources, priv->child_count);
if (source == -1) {
gf_log (this->name, GF_LOG_DEBUG,
"No active sources found.");
afr_sh_metadata_finish (frame, this);
- return 0;
+ goto out;
}
- sh->source = source;
+ sh->source = source;
- /* detect changes not visible through pending flags -- JIC */
- for (i = 0; i < priv->child_count; i++) {
- if (i == source || sh->child_errno[i])
- continue;
+ /* detect changes not visible through pending flags -- JIC */
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == source || sh->child_errno[i])
+ continue;
- if (PERMISSION_DIFFERS (&sh->buf[i], &sh->buf[source]))
- sh->sources[i] = 0;
+ if (PERMISSION_DIFFERS (&sh->buf[i], &sh->buf[source]))
+ sh->sources[i] = 0;
- if (OWNERSHIP_DIFFERS (&sh->buf[i], &sh->buf[source]))
- sh->sources[i] = 0;
- }
+ if (OWNERSHIP_DIFFERS (&sh->buf[i], &sh->buf[source]))
+ sh->sources[i] = 0;
+ }
- afr_sh_metadata_sync_prepare (frame, this);
+ if ((!IA_ISREG (sh->buf[source].ia_type)) &&
+ (!IA_ISDIR (sh->buf[source].ia_type))) {
+ afr_reset_children (sh->fresh_children, priv->child_count);
+ afr_get_fresh_children (sh->success_children, sh->sources,
+ sh->fresh_children, priv->child_count);
+ afr_inode_set_read_ctx (this, sh->inode, sh->source,
+ sh->fresh_children);
+ }
- return 0;
+ if (sh->do_metadata_self_heal && priv->metadata_self_heal)
+ afr_sh_metadata_sync_prepare (frame, this);
+ else
+ afr_sh_metadata_finish (frame, this);
+out:
+ return;
}
-
int
-afr_sh_metadata_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *buf, dict_t *xattr)
+afr_sh_metadata_post_nonblocking_inodelk_cbk (call_frame_t *frame,
+ xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int child_index = 0;
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "path %s on subvolume %s is of mode 0%o",
- local->loc.path,
- priv->children[child_index]->name,
- buf->st_mode);
-
- sh->buf[child_index] = *buf;
- if (xattr)
- sh->xattr[child_index] = dict_ref (xattr);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "path %s on subvolume %s => -1 (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
-
- sh->child_errno[child_index] = op_errno;
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- afr_sh_metadata_fix (frame, this);
-
- return 0;
-}
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ if (int_lock->lock_op_ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG, "Non Blocking metadata "
+ "inodelks failed for %s.", local->loc.path);
+ gf_log (this->name, GF_LOG_DEBUG, "Metadata self-heal "
+ "failed for %s.", local->loc.path);
+ afr_sh_metadata_done (frame, this);
+ } else {
+
+ gf_log (this->name, GF_LOG_DEBUG, "Non Blocking metadata "
+ "inodelks done for %s. Proceeding to FOP",
+ local->loc.path);
+ afr_sh_common_lookup (frame, this, &local->loc,
+ afr_sh_metadata_fix, NULL,
+ AFR_LOOKUP_FAIL_CONFLICTS |
+ AFR_LOOKUP_FAIL_MISSING_GFIDS,
+ NULL);
+ }
+ return 0;
+}
int
-afr_sh_metadata_lookup (call_frame_t *frame, xlator_t *this)
+afr_sh_metadata_lock (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int call_count = 0;
- dict_t *xattr_req = NULL;
- int ret = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- call_count = local->child_count;
- local->call_count = call_count;
-
- xattr_req = dict_new();
-
- if (xattr_req) {
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_uint64 (xattr_req,
- priv->pending_key[i],
- 3 * sizeof(int32_t));
- }
- }
+ afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
+ afr_local_t *local = NULL;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s on %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_metadata_lookup_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->lookup,
- &local->loc, xattr_req);
- if (!--call_count)
- break;
- }
- }
-
- if (xattr_req)
- dict_unref (xattr_req);
-
- return 0;
-}
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ int_lock->domain = this->name;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ int_lock->transaction_lk_type = AFR_SELFHEAL_LK;
+ int_lock->selfheal_lk_type = AFR_METADATA_SELF_HEAL_LK;
-int
-afr_sh_metadata_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
-
- /* TODO: what if lock fails? */
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- sh->op_failed = 1;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "locking of %s on child %d failed: %s",
- local->loc.path, child_index,
- strerror (op_errno));
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "inode of %s on child %d locked",
- local->loc.path, child_index);
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (sh->op_failed) {
- afr_sh_metadata_finish (frame, this);
- return 0;
- }
-
- afr_sh_metadata_lookup (frame, this);
- }
-
- return 0;
-}
+ afr_set_lock_number (frame, this);
+ inodelk->flock.l_start = LLONG_MAX - 1;
+ inodelk->flock.l_len = 0;
+ inodelk->flock.l_type = F_WRLCK;
+ int_lock->lock_cbk = afr_sh_metadata_post_nonblocking_inodelk_cbk;
-int
-afr_sh_metadata_lock (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int call_count = 0;
- struct flock flock = {0, };
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- call_count = local->child_count;
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- flock.l_start = 0;
- flock.l_len = 0;
- flock.l_type = F_WRLCK;
-
- if (local->child_up[i]) {
- gf_log (this->name, GF_LOG_TRACE,
- "locking %s on subvolume %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_metadata_lk_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- this->name,
- &local->loc, F_SETLK, &flock);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ afr_nonblocking_inodelk (frame, this);
+
+ return 0;
}
+gf_boolean_t
+afr_can_start_metadata_self_heal (afr_self_heal_t *sh, afr_private_t *priv)
+{
+ if (sh->force_confirm_spb)
+ return _gf_true;
+ if (sh->do_metadata_self_heal && priv->metadata_self_heal)
+ return _gf_true;
+ return _gf_false;
+}
int
afr_self_heal_metadata (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = this->private;
-
-
- local = frame->local;
- sh = &local->self_heal;
-
- if (local->need_metadata_self_heal && priv->metadata_self_heal) {
- afr_sh_metadata_lock (frame, this);
- } else {
- afr_sh_metadata_done (frame, this);
- }
+ afr_local_t *local = NULL;
+ afr_private_t *priv = this->private;
+ afr_self_heal_t *sh = &local->self_heal;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ sh->sh_type_in_action = AFR_SELF_HEAL_METADATA;
+
+ if (afr_can_start_metadata_self_heal (sh, priv)) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
+ afr_sh_metadata_lock (frame, this);
+ } else {
+ afr_sh_metadata_done (frame, this);
+ }
- return 0;
+ return 0;
}
-
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
index 73abd8fb9..7c9bc8111 100644
--- a/xlators/cluster/afr/src/afr-self-heal.h
+++ b/xlators/cluster/afr/src/afr-self-heal.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __AFR_SELF_HEAL_H__
@@ -22,19 +13,12 @@
#include <sys/stat.h>
-#define FILETYPE_DIFFERS(buf1,buf2) ((S_IFMT & ((struct stat *)buf1)->st_mode) != (S_IFMT & ((struct stat *)buf2)->st_mode))
-#define PERMISSION_DIFFERS(buf1,buf2) ((((struct stat *)buf1)->st_mode) != (((struct stat *)buf2)->st_mode))
-#define OWNERSHIP_DIFFERS(buf1,buf2) ((((struct stat *)buf1)->st_uid) != (((struct stat *)buf2)->st_uid) || (((struct stat *)buf1)->st_gid != (((struct stat *)buf2)->st_gid)))
-#define SIZE_DIFFERS(buf1,buf2) ((((struct stat *)buf1)->st_size) != (((struct stat *)buf2)->st_size))
-
-#define SIZE_GREATER(buf1,buf2) ((((struct stat *)buf1)->st_size > (((struct stat *)buf2)->st_size)))
+#define FILETYPE_DIFFERS(buf1,buf2) ((buf1)->ia_type != (buf2)->ia_type)
+#define PERMISSION_DIFFERS(buf1,buf2) (st_mode_from_ia ((buf1)->ia_prot, (buf1)->ia_type) != st_mode_from_ia ((buf2)->ia_prot, (buf2)->ia_type))
+#define OWNERSHIP_DIFFERS(buf1,buf2) (((buf1)->ia_uid != (buf2)->ia_uid) || ((buf1)->ia_gid != (buf2)->ia_gid))
+#define SIZE_DIFFERS(buf1,buf2) ((buf1)->ia_size != (buf2)->ia_size)
-int
-afr_sh_has_metadata_pending (dict_t *xattr, int child_count, xlator_t *this);
-int
-afr_sh_has_entry_pending (dict_t *xattr, int child_count, xlator_t *this);
-int
-afr_sh_has_data_pending (dict_t *xattr, int child_count, xlator_t *this);
+#define SIZE_GREATER(buf1,buf2) ((buf1)->ia_size > (buf2)->ia_size)
int
afr_self_heal_entry (call_frame_t *frame, xlator_t *this);
@@ -46,7 +30,14 @@ int
afr_self_heal_metadata (call_frame_t *frame, xlator_t *this);
int
-afr_self_heal (call_frame_t *frame, xlator_t *this,
- int (*completion_cbk) (call_frame_t *, xlator_t *));
+afr_self_heal_get_source (xlator_t *this, afr_local_t *local, dict_t **xattr);
+
+int
+afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode);
+int
+afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,
+ dict_t **xattr,
+ afr_transaction_type txn_type,
+ uuid_t gfid);
#endif /* __AFR_SELF_HEAL_H__ */
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
new file mode 100644
index 000000000..1b48a1bca
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -0,0 +1,1787 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+#include "afr.h"
+#include "syncop.h"
+#include "afr-self-heald.h"
+#include "afr-self-heal-common.h"
+#include "protocol-common.h"
+#include "event-history.h"
+
+typedef enum {
+ STOP_CRAWL_ON_SINGLE_SUBVOL = 1
+} afr_crawl_flags_t;
+
+typedef enum {
+ HEAL = 1,
+ INFO,
+ STATISTICS_TO_BE_HEALED,
+} shd_crawl_op;
+
+typedef struct shd_dump {
+ dict_t *dict;
+ xlator_t *this;
+ int child;
+} shd_dump_t;
+
+typedef struct shd_event_ {
+ int child;
+ char *path;
+} shd_event_t;
+
+typedef struct shd_pos_ {
+ int child;
+ xlator_t *this;
+ afr_child_pos_t pos;
+} shd_pos_t;
+
+typedef int
+(*afr_crawl_done_cbk_t) (int ret, call_frame_t *sync_frame, void *crawl_data);
+
+void
+afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl,
+ process_entry_cbk_t process_entry, void *op_data,
+ gf_boolean_t exclusive, int crawl_flags,
+ afr_crawl_done_cbk_t crawl_done);
+
+static int
+_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data);
+
+/* For calling straight through (e.g. already in a synctask). */
+int
+afr_find_child_position (xlator_t *this, int child, afr_child_pos_t *pos);
+
+/* For deferring through a new synctask. */
+int
+afr_syncop_find_child_position (void *data);
+
+static int
+_loc_assign_gfid_path (loc_t *loc)
+{
+ int ret = -1;
+ char gfid_path[64] = {0};
+
+ if (loc->inode && !uuid_is_null (loc->inode->gfid)) {
+ ret = inode_path (loc->inode, NULL, (char**)&loc->path);
+ } else if (!uuid_is_null (loc->gfid)) {
+ snprintf (gfid_path, sizeof (gfid_path), "<gfid:%s>",
+ uuid_utoa (loc->gfid));
+ loc->path = gf_strdup (gfid_path);
+ if (loc->path)
+ ret = 0;
+ }
+ return ret;
+}
+
+void
+_destroy_crawl_event_data (void *data)
+{
+ shd_crawl_event_t *crawl_event = NULL;
+
+ if (!data)
+ goto out;
+
+ crawl_event = (shd_crawl_event_t *)data;
+ GF_FREE (crawl_event->start_time_str);
+ GF_FREE (crawl_event->end_time_str);
+
+out:
+ return;
+}
+
+void
+_destroy_shd_event_data (void *data)
+{
+ shd_event_t *event = NULL;
+ if (!data)
+ goto out;
+ event = (shd_event_t*)data;
+ GF_FREE (event->path);
+out:
+ return;
+}
+void
+shd_cleanup_event (void *event)
+{
+ shd_event_t *shd_event = event;
+
+ if (!shd_event)
+ goto out;
+ GF_FREE (shd_event->path);
+ GF_FREE (shd_event);
+out:
+ return;
+}
+
+int
+afr_get_local_child (afr_self_heald_t *shd, unsigned int child_count)
+{
+ int i = 0;
+ int ret = -1;
+ for (i = 0; i < child_count; i++) {
+ if (shd->pos[i] == AFR_POS_LOCAL) {
+ ret = i;
+ break;
+ }
+ }
+ return ret;
+}
+
+static int
+_build_index_loc (xlator_t *this, loc_t *loc, char *name, loc_t *parent)
+{
+ int ret = 0;
+
+ uuid_copy (loc->pargfid, parent->inode->gfid);
+ loc->path = "";
+ loc->name = name;
+ loc->parent = inode_ref (parent->inode);
+ if (!loc->parent) {
+ loc->path = NULL;
+ loc_wipe (loc);
+ ret = -1;
+ }
+ return ret;
+}
+
+int
+_add_crawl_stats_to_dict (xlator_t *this, dict_t *output, int child,
+ shd_crawl_event_t *shd_event, struct timeval *tv)
+{
+ int ret = 0;
+ uint64_t count = 0;
+ char key[256] = {0};
+ int xl_id = 0;
+ uint64_t healed_count = 0;
+ uint64_t split_brain_count = 0;
+ uint64_t heal_failed_count = 0;
+ char *start_time_str = NULL;
+ char *end_time_str = NULL;
+ char *crawl_type = NULL;
+ int progress = -1;
+
+ healed_count = shd_event->healed_count;
+ split_brain_count = shd_event->split_brain_count;
+ heal_failed_count = shd_event->heal_failed_count;
+ start_time_str = shd_event->start_time_str;
+ end_time_str = shd_event->end_time_str;
+ crawl_type = shd_event->crawl_type;
+
+ if (!start_time_str) {
+ ret = -1;
+ goto out;
+ }
+
+
+ ret = dict_get_int32 (output, this->name, &xl_id);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "xl does not have id");
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "statistics-%d-%d-count", xl_id, child);
+ ret = dict_get_uint64 (output, key, &count);
+
+ snprintf (key, sizeof (key), "statistics_healed_cnt-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_uint64(output, key, healed_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_"
+ "healed_count to outout");
+ goto out;
+ }
+ snprintf (key, sizeof (key), "statistics_sb_cnt-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_uint64 (output, key, split_brain_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_"
+ "split_brain_count to outout");
+ goto out;
+ }
+ snprintf (key, sizeof (key), "statistics_crawl_type-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_dynstr (output, key, gf_strdup (crawl_type));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_"
+ "crawl_type to output");
+ goto out;
+ }
+ snprintf (key, sizeof (key), "statistics_heal_failed_cnt-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_uint64 (output, key, heal_failed_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_"
+ "healed_failed_count to outout");
+ goto out;
+ }
+ snprintf (key, sizeof (key), "statistics_strt_time-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_dynstr (output, key, gf_strdup(start_time_str));
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_"
+ "crawl_start_time to outout");
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "statistics_end_time-%d-%d-%"PRIu64,
+ xl_id, child, count);
+
+ if (!end_time_str)
+ end_time_str = "Could not determine the end time";
+ ret = dict_set_dynstr (output, key, gf_strdup(end_time_str));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_"
+ "crawl_end_time to outout");
+ goto out;
+ }
+ snprintf (key, sizeof (key), "statistics_inprogress-%d-%d-%"PRIu64,
+ xl_id, child, count);
+
+ if (shd_event->crawl_inprogress == _gf_true)
+ progress = 1;
+ else
+ progress = 0;
+
+ ret = dict_set_int32 (output, key, progress);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_"
+ "inprogress to outout");
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "statistics-%d-%d-count",xl_id, child);
+ ret = dict_set_uint64 (output, key, count + 1);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not increment the "
+ "counter.");
+ goto out;
+ }
+out:
+ return ret;
+}
+
+int
+_add_path_to_dict (xlator_t *this, dict_t *output, int child, char *path,
+ struct timeval *tv, gf_boolean_t dyn)
+{
+ //subkey not used for now
+ int ret = -1;
+ uint64_t count = 0;
+ char key[256] = {0};
+ int xl_id = 0;
+
+ ret = dict_get_int32 (output, this->name, &xl_id);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "xl does not have id");
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "%d-%d-count", xl_id, child);
+ ret = dict_get_uint64 (output, key, &count);
+
+ snprintf (key, sizeof (key), "%d-%d-%"PRIu64, xl_id, child, count);
+ if (dyn)
+ ret = dict_set_dynstr (output, key, path);
+ else
+ ret = dict_set_str (output, key, path);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Could not add to output",
+ path);
+ goto out;
+ }
+
+ if (!tv)
+ goto inc_count;
+ snprintf (key, sizeof (key), "%d-%d-%"PRIu64"-time", xl_id,
+ child, count);
+ ret = dict_set_uint32 (output, key, tv->tv_sec);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Could not set time",
+ path);
+ goto out;
+ }
+
+inc_count:
+ snprintf (key, sizeof (key), "%d-%d-count", xl_id, child);
+ ret = dict_set_uint64 (output, key, count + 1);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not increment count");
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+_get_path_from_gfid_loc (xlator_t *this, xlator_t *readdir_xl, loc_t *child,
+ char **fpath, gf_boolean_t *missing)
+{
+ dict_t *xattr = NULL;
+ char *path = NULL;
+ int ret = -1;
+
+ ret = syncop_getxattr (readdir_xl, child, &xattr, GFID_TO_PATH_KEY);
+ if (ret < 0) {
+ if ((errno == ENOENT) && missing)
+ *missing = _gf_true;
+ goto out;
+ }
+ ret = dict_get_str (xattr, GFID_TO_PATH_KEY, &path);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get path for "
+ "gfid %s", uuid_utoa (child->gfid));
+ goto out;
+ }
+ path = gf_strdup (path);
+ if (!path) {
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+ if (!ret)
+ *fpath = path;
+ if (xattr)
+ dict_unref (xattr);
+ return ret;
+}
+
+int
+_add_event_to_dict (circular_buffer_t *cb, void *data)
+{
+ int ret = 0;
+ shd_dump_t *dump_data = NULL;
+ shd_event_t *shd_event = NULL;
+
+ dump_data = data;
+ shd_event = cb->data;
+ if (shd_event->child != dump_data->child)
+ goto out;
+ ret = _add_path_to_dict (dump_data->this, dump_data->dict,
+ dump_data->child, shd_event->path, &cb->tv,
+ _gf_false);
+out:
+ return ret;
+}
+
+int
+_add_crawl_event_statistics_to_dict (circular_buffer_t *cb, void *data)
+{
+ int ret = 0;
+ shd_dump_t *dump_data = NULL;
+ shd_crawl_event_t *shd_event = NULL;
+
+ dump_data = data;
+ shd_event = cb->data;
+ ret = _add_crawl_stats_to_dict (dump_data->this, dump_data->dict,
+ dump_data->child, shd_event, &cb->tv);
+ return ret;
+}
+
+int
+_add_eh_to_dict (xlator_t *this, eh_t *eh, dict_t *dict, int child)
+{
+ shd_dump_t dump_data = {0};
+
+ dump_data.this = this;
+ dump_data.dict = dict;
+ dump_data.child = child;
+ eh_dump (eh, &dump_data, _add_event_to_dict);
+ return 0;
+}
+
+
+int
+_add_statistics_to_dict (xlator_t *this, dict_t *dict, int child)
+{
+ shd_dump_t dump_data = {0};
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ dump_data.this = this;
+ dump_data.dict = dict;
+ dump_data.child = child;
+ eh_dump (shd->statistics[child], &dump_data,
+ _add_crawl_event_statistics_to_dict);
+ return 0;
+
+}
+
+void
+_remove_stale_index (xlator_t *this, xlator_t *readdir_xl,
+ loc_t *parent, char *fname)
+{
+ int ret = 0;
+ loc_t index_loc = {0};
+
+ ret = _build_index_loc (this, &index_loc, fname, parent);
+ if (ret)
+ goto out;
+ gf_log (this->name, GF_LOG_DEBUG, "Removing stale index "
+ "for %s on %s", index_loc.name, readdir_xl->name);
+ ret = syncop_unlink (readdir_xl, &index_loc);
+ if(ret && (errno != ENOENT)) {
+ gf_log(this->name, GF_LOG_ERROR, "%s: Failed to remove index "
+ "on %s - %s",index_loc.name, readdir_xl->name,
+ strerror (errno));
+ }
+ index_loc.path = NULL;
+ loc_wipe (&index_loc);
+out:
+ return;
+}
+
+int
+_count_hard_links_under_base_indices_dir (xlator_t *this,
+ afr_crawl_data_t *crawl_data,
+ gf_dirent_t *entry, loc_t *childloc,
+ loc_t *parentloc, struct iatt *iattr)
+{
+ xlator_t *readdir_xl = crawl_data->readdir_xl;
+ struct iatt parent = {0};
+ int ret = 0;
+ dict_t *output = NULL;
+ int xl_id = 0;
+ char key[256] = {0};
+ int child = -1;
+ uint64_t hardlinks = 0;
+
+ output = crawl_data->op_data;
+ child = crawl_data->child;
+
+ ret = syncop_lookup (readdir_xl, childloc, NULL, iattr, NULL, &parent);
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32 (output, this->name, &xl_id);
+ if (ret)
+ goto out;
+
+ snprintf (key, sizeof (key), "%d-%d-hardlinks", xl_id, child);
+ ret = dict_get_uint64 (output, key, &hardlinks);
+
+ /*Removing the count of base_entry under indices/base_indicies and
+ * entry under indices/xattrop */
+ hardlinks = hardlinks + iattr->ia_nlink - 2;
+ ret = dict_set_uint64 (output, key, hardlinks);
+ if (ret)
+ goto out;
+
+out:
+ return ret;
+}
+
+int
+_add_summary_to_dict (xlator_t *this, afr_crawl_data_t *crawl_data,
+ gf_dirent_t *entry,
+ loc_t *childloc, loc_t *parentloc, struct iatt *iattr)
+{
+ dict_t *output = NULL;
+ xlator_t *readdir_xl = NULL;
+ int ret = -1;
+ char *path = NULL;
+ gf_boolean_t missing = _gf_false;
+ char gfid_str[64] = {0};
+
+ if (uuid_is_null (childloc->gfid))
+ goto out;
+
+ output = crawl_data->op_data;
+ readdir_xl = crawl_data->readdir_xl;
+
+ ret = _get_path_from_gfid_loc (this, readdir_xl, childloc, &path,
+ &missing);
+ if (ret == 0) {
+ ret = _add_path_to_dict (this, output, crawl_data->child, path,
+ NULL, _gf_true);
+ } else if (missing) {
+ _remove_stale_index (this, readdir_xl, parentloc,
+ uuid_utoa_r (childloc->gfid, gfid_str));
+ }
+
+out:
+ if (ret && path)
+ GF_FREE (path);
+ return ret;
+}
+
+void
+_crawl_post_sh_action (xlator_t *this, loc_t *parent, loc_t *child,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr_rsp,
+ afr_crawl_data_t *crawl_data)
+{
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ eh_t *eh = NULL;
+ char *path = NULL;
+ char gfid_str[64] = {0};
+ shd_event_t *event = NULL;
+ int32_t sh_failed = 0;
+ gf_boolean_t split_brain = 0;
+ int32_t actual_sh_done = 0;
+ shd_crawl_event_t **shd_crawl_event = NULL;
+
+ priv = this->private;
+ shd = &priv->shd;
+ if (crawl_data->crawl == INDEX) {
+ if ((op_ret < 0) && (op_errno == ENOENT)) {
+ _remove_stale_index (this, crawl_data->readdir_xl,
+ parent, uuid_utoa_r (child->gfid,
+ gfid_str));
+ goto out;
+ }
+ ret = _get_path_from_gfid_loc (this, crawl_data->readdir_xl,
+ child, &path, NULL);
+ if (ret)
+ goto out;
+ } else {
+ path = gf_strdup (child->path);
+ if (!path) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (xattr_rsp) {
+ ret = dict_get_int32 (xattr_rsp, "sh-failed", &sh_failed);
+ ret = dict_get_int32 (xattr_rsp, "actual-sh-done", &actual_sh_done);
+ }
+
+ shd_crawl_event = (shd_crawl_event_t**)(shd->crawl_events);
+
+ split_brain = afr_is_split_brain (this, child->inode);
+ if ((op_ret < 0 && op_errno == EIO) || split_brain) {
+ eh = shd->split_brain;
+ shd_crawl_event[crawl_data->child]->split_brain_count += 1;
+ } else if ((op_ret < 0) || sh_failed) {
+ eh = shd->heal_failed;
+ shd_crawl_event[crawl_data->child]->heal_failed_count += 1;
+ } else if (actual_sh_done == 1) {
+ eh = shd->healed;
+ shd_crawl_event[crawl_data->child]->healed_count += 1;
+ }
+ ret = -1;
+
+ if (eh != NULL) {
+ event = GF_CALLOC (1, sizeof (*event), gf_afr_mt_shd_event_t);
+ if (!event)
+ goto out;
+ event->child = crawl_data->child;
+ event->path = path;
+
+ ret = eh_save_history (eh, event);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "%s:Failed to save "
+ "to event history, (%d, %s)", path, op_ret,
+ strerror (op_errno));
+
+ goto out;
+ }
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG, "%s:Self heal already done ",
+ path);
+
+ }
+ ret = 0;
+out:
+ if (ret && path)
+ GF_FREE (path);
+ return;
+}
+
+int
+_link_inode_update_loc (xlator_t *this, loc_t *loc, struct iatt *iattr)
+{
+ inode_t *link_inode = NULL;
+ int ret = -1;
+
+ link_inode = inode_link (loc->inode, NULL, NULL, iattr);
+ if (link_inode == NULL) {
+ gf_log (this->name, GF_LOG_ERROR, "inode link failed "
+ "on the inode (%s)", uuid_utoa (iattr->ia_gfid));
+ goto out;
+ }
+ inode_unref (loc->inode);
+ loc->inode = link_inode;
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+_self_heal_entry (xlator_t *this, afr_crawl_data_t *crawl_data, gf_dirent_t *entry,
+ loc_t *child, loc_t *parent, struct iatt *iattr)
+{
+ struct iatt parentbuf = {0};
+ int ret = 0;
+ dict_t *xattr_rsp = NULL;
+ dict_t *xattr_req = NULL;
+
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_int32 (xattr_req, "allow-sh-for-running-transaction", 1);
+
+ gf_log (this->name, GF_LOG_DEBUG, "lookup %s", child->path);
+
+ ret = syncop_lookup (this, child, xattr_req,
+ iattr, &xattr_rsp, &parentbuf);
+ _crawl_post_sh_action (this, parent, child, ret, errno, xattr_rsp,
+ crawl_data);
+ if (xattr_rsp)
+ dict_unref (xattr_rsp);
+ if (ret == 0)
+ ret = _link_inode_update_loc (this, child, iattr);
+
+out:
+ if (xattr_req)
+ dict_unref(xattr_req);
+ return ret;
+}
+
+static int
+afr_crawl_done (int ret, call_frame_t *sync_frame, void *data)
+{
+ GF_FREE (data);
+ STACK_DESTROY (sync_frame->root);
+ return 0;
+}
+
+void
+_do_self_heal_on_subvol (xlator_t *this, int child, afr_crawl_type_t crawl)
+{
+ afr_start_crawl (this, child, crawl, _self_heal_entry,
+ NULL, _gf_true, STOP_CRAWL_ON_SINGLE_SUBVOL,
+ afr_crawl_done);
+}
+
+gf_boolean_t
+_crawl_proceed (xlator_t *this, int child, int crawl_flags, char **reason)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ gf_boolean_t proceed = _gf_false;
+ char *msg = NULL;
+
+ priv = this->private;
+ shd = &priv->shd;
+ if (!shd->enabled) {
+ msg = "Self-heal daemon is not enabled";
+ gf_log (this->name, GF_LOG_DEBUG, "%s", msg);
+ goto out;
+ }
+ if (!priv->child_up[child]) {
+ gf_log (this->name, GF_LOG_DEBUG, "Stopping crawl for %s , "
+ "subvol went down", priv->children[child]->name);
+ msg = "Brick is Not connected";
+ goto out;
+ }
+
+ if (crawl_flags & STOP_CRAWL_ON_SINGLE_SUBVOL) {
+ if (afr_up_children_count (priv->child_up,
+ priv->child_count) < 2) {
+ gf_log (this->name, GF_LOG_DEBUG, "Stopping crawl as "
+ "< 2 children are up");
+ msg = "< 2 bricks in replica are running";
+ goto out;
+ }
+ }
+ proceed = _gf_true;
+out:
+ if (reason)
+ *reason = msg;
+ return proceed;
+}
+
+int
+_do_crawl_op_on_local_subvols (xlator_t *this, afr_crawl_type_t crawl,
+ shd_crawl_op op, dict_t *output)
+{
+ afr_private_t *priv = NULL;
+ char *status = NULL;
+ char *subkey = NULL;
+ char key[256] = {0};
+ shd_pos_t pos_data = {0};
+ int op_ret = -1;
+ int xl_id = -1;
+ int i = 0;
+ int ret = 0;
+ int crawl_flags = 0;
+
+ priv = this->private;
+ if (op == HEAL)
+ crawl_flags |= STOP_CRAWL_ON_SINGLE_SUBVOL;
+
+ if (output) {
+ ret = dict_get_int32 (output, this->name, &xl_id);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Invalid input, "
+ "translator-id is not available");
+ goto out;
+ }
+ }
+ pos_data.this = this;
+ subkey = "status";
+ for (i = 0; i < priv->child_count; i++) {
+ if (_crawl_proceed (this, i, crawl_flags, &status)) {
+ pos_data.child = i;
+ /*
+ * We're already in a synctask in this case, so we
+ * don't need to defer through a second (and in fact
+ * that can cause deadlock). Just call straight
+ * through instead.
+ */
+ ret = afr_find_child_position(pos_data.this,
+ pos_data.child,
+ &pos_data.pos);
+ if (ret) {
+ status = "Not able to find brick location";
+ } else if (pos_data.pos == AFR_POS_REMOTE) {
+ status = "brick is remote";
+ } else {
+ op_ret = 0;
+ if (op == HEAL) {
+ status = "Started self-heal";
+ _do_self_heal_on_subvol (this, i,
+ crawl);
+ } else if (output && (op == INFO)) {
+ status = "";
+ afr_start_crawl (this, i, INDEX,
+ _add_summary_to_dict,
+ output, _gf_false, 0,
+ NULL);
+ } else if (output &&
+ (op == STATISTICS_TO_BE_HEALED)) {
+ status = "";
+ afr_start_crawl (this, i,
+ INDEX_TO_BE_HEALED,
+ _count_hard_links_under_base_indices_dir,
+ output, _gf_false,
+ 0, NULL);
+ }
+ }
+ if (output) {
+ snprintf (key, sizeof (key), "%d-%d-%s", xl_id,
+ i, subkey);
+ ret = dict_set_str (output, key, status);
+ }
+ if (!op_ret && (crawl == FULL))
+ break;
+ }
+ if (output) {
+ snprintf (key, sizeof (key), "%d-%d-%s", xl_id, i,
+ subkey);
+ ret = dict_set_str (output, key, status);
+ }
+ }
+out:
+ return op_ret;
+}
+
+int
+_do_self_heal_on_local_subvols (xlator_t *this, afr_crawl_type_t crawl,
+ dict_t *output)
+{
+ return _do_crawl_op_on_local_subvols (this, crawl, HEAL, output);
+}
+
+int
+_get_index_summary_on_local_subvols (xlator_t *this, dict_t *output)
+{
+ return _do_crawl_op_on_local_subvols (this, INDEX, INFO, output);
+}
+
+void
+afr_fill_completed_crawl_statistics_to_dict (xlator_t *this, dict_t *dict)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int i = 0;
+ priv = this->private;
+ shd= &priv->shd;
+ for (i = 0; i < priv->child_count; i++) {
+ if (shd->pos[i] != AFR_POS_LOCAL)
+ continue;
+ _add_statistics_to_dict (this, dict, i);
+ }
+
+ return ;
+}
+
+static void
+reset_crawl_event (shd_crawl_event_t *crawl_event)
+{
+ crawl_event->healed_count = 0;
+ crawl_event->split_brain_count = 0;
+ crawl_event->heal_failed_count = 0;
+ GF_FREE (crawl_event->start_time_str);
+ crawl_event->start_time_str = NULL;
+ crawl_event->end_time_str = NULL;
+ crawl_event->crawl_type = NULL;
+ crawl_event->crawl_inprogress = _gf_false;
+ return;
+}
+
+static void
+afr_copy_crawl_event_struct (shd_crawl_event_t *src, shd_crawl_event_t *dst)
+{
+ dst->healed_count = src->healed_count;
+ dst->split_brain_count = src->split_brain_count;
+ dst->heal_failed_count = src->heal_failed_count;
+ dst->start_time_str = gf_strdup (src->start_time_str);
+ dst->end_time_str = "Crawl is already in progress";
+ dst->crawl_type = src->crawl_type;
+ dst->crawl_inprogress = _gf_true;
+ return;
+}
+
+static int
+afr_fill_crawl_statistics_of_running_crawl(xlator_t *this, dict_t *dict)
+{
+ shd_crawl_event_t *evnt = NULL;
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int i = 0;
+ priv = this->private;
+ shd = &priv->shd;
+
+ evnt = GF_CALLOC (1, sizeof (shd_crawl_event_t),
+ gf_afr_mt_shd_crawl_event_t);
+ if (!evnt) {
+ ret = -1;
+ goto out;
+ }
+ LOCK (&priv->lock);
+ {
+ for (i = 0; i < priv->child_count; i++) {
+ if (shd->pos[i] != AFR_POS_LOCAL)
+ continue;
+
+ reset_crawl_event (evnt);
+
+ if (!shd->crawl_events[i]) {
+ continue;
+ }
+
+ afr_copy_crawl_event_struct (shd->crawl_events[i],
+ evnt);
+ _add_crawl_stats_to_dict (this, dict, i, evnt, NULL);
+
+ }
+ }
+ UNLOCK (&priv->lock);
+ reset_crawl_event (evnt);
+ GF_FREE (evnt);
+
+out:
+ return ret;
+}
+
+static int
+_add_local_subvols_crawl_statistics_to_dict (xlator_t *this, dict_t *dict)
+{
+ int ret = 0;
+ afr_fill_completed_crawl_statistics_to_dict (this, dict);
+ ret = afr_fill_crawl_statistics_of_running_crawl (this, dict);
+ return ret;
+}
+int
+_add_local_subvols_eh_to_dict (xlator_t *this, eh_t *eh, dict_t *dict)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int i = 0;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (shd->pos[i] != AFR_POS_LOCAL)
+ continue;
+ _add_eh_to_dict (this, eh, dict, i);
+ }
+ return 0;
+}
+
+int
+afr_xl_op (xlator_t *this, dict_t *input, dict_t *output)
+{
+ gf_xl_afr_op_t op = GF_AFR_OP_INVALID;
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int xl_id = 0;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ ret = dict_get_int32 (input, "xl-op", (int32_t*)&op);
+ if (ret)
+ goto out;
+ ret = dict_get_int32 (input, this->name, &xl_id);
+ if (ret)
+ goto out;
+ ret = dict_set_int32 (output, this->name, xl_id);
+ if (ret)
+ goto out;
+ switch (op) {
+ case GF_AFR_OP_HEAL_INDEX:
+ ret = _do_self_heal_on_local_subvols (this, INDEX, output);
+ break;
+ case GF_AFR_OP_HEAL_FULL:
+ ret = _do_self_heal_on_local_subvols (this, FULL, output);
+ break;
+ case GF_AFR_OP_INDEX_SUMMARY:
+ (void)_get_index_summary_on_local_subvols (this, output);
+ ret = 0;
+ break;
+ case GF_AFR_OP_HEALED_FILES:
+ ret = _add_local_subvols_eh_to_dict (this, shd->healed, output);
+ break;
+ case GF_AFR_OP_HEAL_FAILED_FILES:
+ ret = _add_local_subvols_eh_to_dict (this, shd->heal_failed,
+ output);
+ break;
+ case GF_AFR_OP_SPLIT_BRAIN_FILES:
+ ret = _add_local_subvols_eh_to_dict (this, shd->split_brain,
+ output);
+ break;
+ case GF_AFR_OP_STATISTICS:
+ ret = _add_local_subvols_crawl_statistics_to_dict (this, output);
+ break;
+ case GF_AFR_OP_STATISTICS_HEAL_COUNT:
+ case GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA:
+ ret = _do_crawl_op_on_local_subvols (this, INDEX_TO_BE_HEALED,
+ STATISTICS_TO_BE_HEALED,
+ output);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_ERROR, "Unknown set op %d", op);
+ break;
+ }
+out:
+ dict_del (output, this->name);
+ return ret;
+}
+
+void
+afr_poll_self_heal (void *data)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ struct timespec timeout = {0};
+ xlator_t *this = NULL;
+ long child = (long)data;
+ gf_timer_t *old_timer = NULL;
+ gf_timer_t *new_timer = NULL;
+ shd_pos_t pos_data = {0};
+ int ret = 0;
+
+ this = THIS;
+ priv = this->private;
+ shd = &priv->shd;
+
+ if (shd->pos[child] == AFR_POS_UNKNOWN) {
+ pos_data.this = this;
+ pos_data.child = child;
+ ret = synctask_new (this->ctx->env,
+ afr_syncop_find_child_position,
+ NULL, NULL, &pos_data);
+ if (!ret)
+ shd->pos[child] = pos_data.pos;
+ }
+ if (shd->enabled && (shd->pos[child] == AFR_POS_LOCAL))
+ _do_self_heal_on_subvol (this, child, INDEX);
+ timeout.tv_sec = shd->timeout;
+ timeout.tv_nsec = 0;
+ //notify and previous timer should be synchronized.
+ LOCK (&priv->lock);
+ {
+ old_timer = shd->timer[child];
+ if (shd->pos[child] == AFR_POS_REMOTE)
+ goto unlock;
+ shd->timer[child] = gf_timer_call_after (this->ctx, timeout,
+ afr_poll_self_heal,
+ data);
+ new_timer = shd->timer[child];
+ }
+unlock:
+ UNLOCK (&priv->lock);
+
+ if (old_timer)
+ gf_timer_call_cancel (this->ctx, old_timer);
+ if (!new_timer && (shd->pos[child] != AFR_POS_REMOTE)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Could not create self-heal polling timer for %s",
+ priv->children[child]->name);
+ }
+ return;
+}
+
+static int
+afr_handle_child_up (int ret, call_frame_t *sync_frame, void *data)
+{
+ afr_self_heald_t *shd = NULL;
+ shd_pos_t *pos_data = data;
+ afr_private_t *priv = NULL;
+
+ if (ret)
+ goto out;
+
+ priv = pos_data->this->private;
+ shd = &priv->shd;
+ shd->pos[pos_data->child] = pos_data->pos;
+ if (pos_data->pos != AFR_POS_REMOTE)
+ afr_poll_self_heal ((void*)(long)pos_data->child);
+ _do_self_heal_on_local_subvols (THIS, INDEX, NULL);
+out:
+ GF_FREE (data);
+ return 0;
+}
+
+void
+afr_proactive_self_heal (void *data)
+{
+ xlator_t *this = NULL;
+ long child = (long)data;
+ shd_pos_t *pos_data = NULL;
+ int ret = 0;
+
+ this = THIS;
+
+ //Position of brick could have changed and it could be local now.
+ //Compute the position again
+ pos_data = GF_CALLOC (1, sizeof (*pos_data), gf_afr_mt_pos_data_t);
+ if (!pos_data)
+ goto out;
+ pos_data->this = this;
+ pos_data->child = child;
+ ret = synctask_new (this->ctx->env, afr_syncop_find_child_position,
+ afr_handle_child_up, NULL, pos_data);
+ if (ret)
+ goto out;
+out:
+ return;
+}
+
+static int
+get_pathinfo_host (char *pathinfo, char *hostname, size_t size)
+{
+ char *start = NULL;
+ char *end = NULL;
+ int ret = -1;
+ int i = 0;
+
+ if (!pathinfo)
+ goto out;
+
+ start = strchr (pathinfo, ':');
+ if (!start)
+ goto out;
+ end = strrchr (pathinfo, ':');
+ if (start == end)
+ goto out;
+
+ memset (hostname, 0, size);
+ i = 0;
+ while (++start != end)
+ hostname[i++] = *start;
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+afr_local_pathinfo (char *pathinfo, gf_boolean_t *local)
+{
+ int ret = 0;
+ char pathinfohost[1024] = {0};
+ char localhost[1024] = {0};
+ xlator_t *this = THIS;
+
+ *local = _gf_false;
+ ret = get_pathinfo_host (pathinfo, pathinfohost, sizeof (pathinfohost));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Invalid pathinfo: %s",
+ pathinfo);
+ goto out;
+ }
+
+ ret = gethostname (localhost, sizeof (localhost));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "gethostname() failed, "
+ "reason: %s", strerror (errno));
+ goto out;
+ }
+
+ if (!strcmp (localhost, pathinfohost))
+ *local = _gf_true;
+out:
+ return ret;
+}
+
+int
+afr_crawl_build_start_loc (xlator_t *this, afr_crawl_data_t *crawl_data,
+ loc_t *dirloc)
+{
+ afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
+ void *index_gfid = NULL;
+ void *base_indices_holder_vgfid = NULL;
+ loc_t rootloc = {0};
+ struct iatt iattr = {0};
+ struct iatt parent = {0};
+ int ret = 0;
+ xlator_t *readdir_xl = crawl_data->readdir_xl;
+
+ priv = this->private;
+ if (crawl_data->crawl == FULL) {
+ afr_build_root_loc (this, dirloc);
+ } else if (crawl_data->crawl == INDEX) {
+ afr_build_root_loc (this, &rootloc);
+ ret = syncop_getxattr (readdir_xl, &rootloc, &xattr,
+ GF_XATTROP_INDEX_GFID);
+ if (ret < 0)
+ goto out;
+ ret = dict_get_ptr (xattr, GF_XATTROP_INDEX_GFID, &index_gfid);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get index "
+ "dir gfid on %s", readdir_xl->name);
+ goto out;
+ }
+ if (!index_gfid) {
+ gf_log (this->name, GF_LOG_ERROR, "index gfid empty "
+ "on %s", readdir_xl->name);
+ ret = -1;
+ goto out;
+ }
+ uuid_copy (dirloc->gfid, index_gfid);
+ dirloc->path = "";
+ dirloc->inode = inode_new (priv->root_inode->table);
+ ret = syncop_lookup (readdir_xl, dirloc, NULL,
+ &iattr, NULL, &parent);
+ if (ret < 0) {
+ if (errno != ENOENT) {
+ gf_log (this->name, GF_LOG_ERROR, "lookup "
+ "failed on index dir on %s - (%s)",
+ readdir_xl->name, strerror (errno));
+ }
+ goto out;
+ }
+ ret = _link_inode_update_loc (this, dirloc, &iattr);
+ if (ret)
+ goto out;
+ } else if (crawl_data->crawl == INDEX_TO_BE_HEALED) {
+ afr_build_root_loc (this, &rootloc);
+ ret = syncop_getxattr (readdir_xl, &rootloc, &xattr,
+ GF_BASE_INDICES_HOLDER_GFID);
+ if (ret < 0)
+ goto out;
+ ret = dict_get_ptr (xattr, GF_BASE_INDICES_HOLDER_GFID,
+ &base_indices_holder_vgfid);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "index gfid empty "
+ "on %s", readdir_xl->name);
+ ret = -1;
+ goto out;
+ }
+ if (!base_indices_holder_vgfid) {
+ gf_log (this->name, GF_LOG_ERROR, "Base indices holder"
+ "virtual gfid is null on %s", readdir_xl->name);
+ ret = -1;
+ goto out;
+ }
+ uuid_copy (dirloc->gfid, base_indices_holder_vgfid);
+ dirloc->path = "";
+ dirloc->inode = inode_new (priv->root_inode->table);
+ ret = syncop_lookup (readdir_xl, dirloc, NULL, &iattr, NULL,
+ &parent);
+ if (ret < 0) {
+ if (errno != ENOENT) {
+ gf_log (this->name, GF_LOG_ERROR, "lookup "
+ "failed for base_indices_holder dir"
+ " on %s - (%s)", readdir_xl->name,
+ strerror (errno));
+
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "base_indices"
+ "_holder is not yet created.");
+ }
+ goto out;
+ }
+ ret = _link_inode_update_loc (this, dirloc, &iattr);
+ if (ret)
+ goto out;
+ }
+ ret = 0;
+out:
+ if (xattr)
+ dict_unref (xattr);
+ loc_wipe (&rootloc);
+ return ret;
+}
+
+int
+afr_crawl_opendir (xlator_t *this, afr_crawl_data_t *crawl_data, fd_t **dirfd,
+ loc_t *dirloc)
+{
+ fd_t *fd = NULL;
+ int ret = 0;
+
+ if (crawl_data->crawl == FULL) {
+ fd = fd_create (dirloc->inode, crawl_data->pid);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to create fd for %s", dirloc->path);
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_opendir (crawl_data->readdir_xl, dirloc, fd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "opendir failed on %s", dirloc->path);
+ goto out;
+ }
+ } else {
+ fd = fd_anonymous (dirloc->inode);
+ }
+ ret = 0;
+out:
+ if (!ret)
+ *dirfd = fd;
+ return ret;
+}
+
+xlator_t*
+afr_crawl_readdir_xl_get (xlator_t *this, afr_crawl_data_t *crawl_data)
+{
+ afr_private_t *priv = this->private;
+
+ if (crawl_data->crawl == FULL) {
+ return this;
+ } else {
+ return priv->children[crawl_data->child];
+ }
+ return NULL;
+}
+
+int
+afr_crawl_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent,
+ gf_dirent_t *entry, afr_crawl_data_t *crawl_data)
+{
+ int ret = -1;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ if (crawl_data->crawl == FULL) {
+ ret = afr_build_child_loc (this, child, parent, entry->d_name);
+ } else if (crawl_data->crawl == INDEX_TO_BE_HEALED) {
+ ret = _build_index_loc (this, child, entry->d_name, parent);
+ if (ret)
+ goto out;
+ child->inode = inode_new (priv->root_inode->table);
+ if (!child->inode) {
+ ret = -1;
+ goto out;
+ }
+ child->path = NULL;
+ } else {
+ child->inode = inode_new (priv->root_inode->table);
+ if (!child->inode)
+ goto out;
+ uuid_parse (entry->d_name, child->gfid);
+ ret = _loc_assign_gfid_path (child);
+ }
+out:
+ return ret;
+}
+
+static int
+_process_entries (xlator_t *this, loc_t *parentloc, gf_dirent_t *entries,
+ off_t *offset, afr_crawl_data_t *crawl_data)
+{
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+ int ret = 0;
+ loc_t entry_loc = {0};
+ fd_t *fd = NULL;
+ struct iatt iattr = {0};
+
+ list_for_each_entry_safe (entry, tmp, &entries->list, list) {
+ if (!_crawl_proceed (this, crawl_data->child,
+ crawl_data->crawl_flags, NULL)) {
+ ret = -1;
+ goto out;
+ }
+ *offset = entry->d_off;
+ if (IS_ENTRY_CWD (entry->d_name) ||
+ IS_ENTRY_PARENT (entry->d_name))
+ continue;
+ if ((crawl_data->crawl == FULL) &&
+ uuid_is_null (entry->d_stat.ia_gfid)) {
+ gf_log (this->name, GF_LOG_WARNING, "%s/%s: No "
+ "gfid present skipping",
+ parentloc->path, entry->d_name);
+ continue;
+ }
+
+ loc_wipe (&entry_loc);
+ ret = afr_crawl_build_child_loc (this, &entry_loc, parentloc,
+ entry, crawl_data);
+ if (ret)
+ goto out;
+
+ ret = crawl_data->process_entry (this, crawl_data, entry,
+ &entry_loc, parentloc, &iattr);
+
+ if (crawl_data->crawl == INDEX_TO_BE_HEALED && ret) {
+ goto out;
+ } else if (ret) {
+ continue;
+ }
+
+ if ((crawl_data->crawl == INDEX) ||
+ (crawl_data->crawl == INDEX_TO_BE_HEALED))
+ continue;
+
+ if (!IA_ISDIR (iattr.ia_type))
+ continue;
+ fd = NULL;
+ ret = afr_crawl_opendir (this, crawl_data, &fd, &entry_loc);
+ if (ret)
+ continue;
+ ret = _crawl_directory (fd, &entry_loc, crawl_data);
+ if (fd)
+ fd_unref (fd);
+ }
+ ret = 0;
+out:
+ if ((crawl_data->crawl == INDEX_TO_BE_HEALED) && ret) {
+ gf_log (this->name, GF_LOG_ERROR,"Failed to get the hardlink "
+ "count");
+ }
+ loc_wipe (&entry_loc);
+ return ret;
+}
+
+static int
+_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data)
+{
+ xlator_t *this = NULL;
+ off_t offset = 0;
+ gf_dirent_t entries;
+ int ret = 0;
+ gf_boolean_t free_entries = _gf_false;
+ xlator_t *readdir_xl = crawl_data->readdir_xl;
+
+ INIT_LIST_HEAD (&entries.list);
+ this = THIS;
+
+ GF_ASSERT (loc->inode);
+
+ if (crawl_data->crawl == FULL)
+ gf_log (this->name, GF_LOG_DEBUG, "crawling %s", loc->path);
+ else
+ gf_log (this->name, GF_LOG_DEBUG, "crawling INDEX %s",
+ uuid_utoa (loc->gfid));
+
+ while (1) {
+ if (crawl_data->crawl == FULL)
+ ret = syncop_readdirp (readdir_xl, fd, 131072, offset,
+ NULL, &entries);
+ else
+ ret = syncop_readdir (readdir_xl, fd, 131072, offset,
+ &entries);
+ if (ret <= 0)
+ break;
+ ret = 0;
+ free_entries = _gf_true;
+
+ if (!_crawl_proceed (this, crawl_data->child,
+ crawl_data->crawl_flags, NULL)) {
+ ret = -1;
+ goto out;
+ }
+ if (list_empty (&entries.list))
+ goto out;
+
+ ret = _process_entries (this, loc, &entries, &offset,
+ crawl_data);
+ if ((ret < 0) && (crawl_data->crawl == INDEX_TO_BE_HEALED)) {
+ goto out;
+ }
+ gf_dirent_free (&entries);
+ free_entries = _gf_false;
+ }
+ ret = 0;
+out:
+ if (free_entries)
+ gf_dirent_free (&entries);
+ return ret;
+}
+
+static char*
+position_str_get (afr_child_pos_t pos)
+{
+ switch (pos) {
+ case AFR_POS_UNKNOWN:
+ return "unknown";
+ case AFR_POS_LOCAL:
+ return "local";
+ case AFR_POS_REMOTE:
+ return "remote";
+ }
+ return NULL;
+}
+
+int
+afr_find_child_position (xlator_t *this, int child, afr_child_pos_t *pos)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ dict_t *xattr_rsp = NULL;
+ loc_t loc = {0};
+ int ret = 0;
+ char *node_uuid = NULL;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ afr_build_root_loc (this, &loc);
+
+ ret = syncop_getxattr (priv->children[child], &loc, &xattr_rsp,
+ GF_XATTR_NODE_UUID_KEY);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "getxattr failed on %s - "
+ "(%s)", priv->children[child]->name, strerror (errno));
+ goto out;
+ }
+
+ ret = dict_get_str (xattr_rsp, GF_XATTR_NODE_UUID_KEY, &node_uuid);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "node-uuid key not found on "
+ "child %s", priv->children[child]->name);
+ goto out;
+ }
+
+ if (!strcmp (node_uuid, shd->node_uuid))
+ *pos = AFR_POS_LOCAL;
+ else
+ *pos = AFR_POS_REMOTE;
+
+ gf_log (this->name, GF_LOG_DEBUG, "child %s is %s",
+ priv->children[child]->name, position_str_get (*pos));
+out:
+ if (ret)
+ *pos = AFR_POS_UNKNOWN;
+ loc_wipe (&loc);
+ return ret;
+}
+
+int
+afr_syncop_find_child_position (void *data)
+{
+ shd_pos_t *pos_data = data;
+ int ret = 0;
+
+ ret = afr_find_child_position (pos_data->this, pos_data->child,
+ &pos_data->pos);
+ return ret;
+}
+
+static int
+afr_dir_crawl (void *data)
+{
+ xlator_t *this = NULL;
+ int ret = -1;
+ xlator_t *readdir_xl = NULL;
+ fd_t *fd = NULL;
+ loc_t dirloc = {0};
+ afr_crawl_data_t *crawl_data = data;
+
+ this = THIS;
+
+ if (!_crawl_proceed (this, crawl_data->child, crawl_data->crawl_flags,
+ NULL))
+ goto out;
+
+ readdir_xl = afr_crawl_readdir_xl_get (this, crawl_data);
+ if (!readdir_xl)
+ goto out;
+ crawl_data->readdir_xl = readdir_xl;
+
+ ret = afr_crawl_build_start_loc (this, crawl_data, &dirloc);
+ if (ret)
+ goto out;
+
+ ret = afr_crawl_opendir (this, crawl_data, &fd, &dirloc);
+ if (ret) {
+ if (crawl_data->crawl == INDEX_TO_BE_HEALED) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to open base_"
+ "indices_holder");
+ }
+ goto out;
+ }
+
+ ret = _crawl_directory (fd, &dirloc, crawl_data);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Crawl failed on %s",
+ readdir_xl->name);
+ else
+ gf_log (this->name, GF_LOG_DEBUG, "Crawl completed "
+ "on %s", readdir_xl->name);
+ if (crawl_data->crawl == INDEX)
+ dirloc.path = NULL;
+out:
+ if (fd)
+ fd_unref (fd);
+ if ((crawl_data->crawl == INDEX) ||
+ (crawl_data->crawl == INDEX_TO_BE_HEALED ))
+ dirloc.path = NULL;
+ loc_wipe (&dirloc);
+ return ret;
+}
+
+char *
+get_crawl_type_in_string (afr_crawl_type_t crawl)
+{
+ char *index = "INDEX";
+ char *full = "FULL";
+ char *crawl_type = NULL;
+
+ if (crawl == INDEX){
+ crawl_type = index;
+ } else if (crawl == FULL) {
+ crawl_type = full;
+ }
+
+ return crawl_type;
+}
+
+static int
+afr_allocate_crawl_event (xlator_t *this, int child, afr_crawl_type_t crawl)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int ret = 0;
+ shd_crawl_event_t *crawl_event = NULL;
+ time_t get_time = 0;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ crawl_event = GF_CALLOC (sizeof (shd_crawl_event_t), 1,
+ gf_afr_mt_shd_crawl_event_t);
+ if (!crawl_event) {
+ ret = -1;
+ goto out;
+ }
+
+ get_time = time(NULL);
+ if (get_time == ((time_t)-1)) {
+ ret = -1;
+ goto out;
+ }
+
+ crawl_event->start_time_str = gf_strdup (ctime(&get_time));
+
+ crawl_event->crawl_type = get_crawl_type_in_string (crawl);
+ if (!crawl_event->crawl_type) {
+ ret = -1;
+ goto out;
+ }
+ LOCK (&priv->lock);
+ {
+ shd->crawl_events[child] = crawl_event;
+ }
+ UNLOCK (&priv->lock);
+ ret = 0;
+out:
+ return ret;
+
+}
+
+static int
+afr_put_crawl_event_in_eh (xlator_t *this, int child)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int ret = 0;
+ time_t get_time = 0;
+ shd_crawl_event_t **crawl_event = NULL;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ get_time = time(NULL);
+ if (get_time == ((time_t)-1)) {
+ ret = -1;
+ goto out;
+ }
+ crawl_event = (shd_crawl_event_t**)shd->crawl_events;
+ LOCK (&priv->lock);
+ {
+ crawl_event[child]->end_time_str = gf_strdup (ctime(&get_time));
+ ret = eh_save_history (shd->statistics[child],
+ crawl_event[child]);
+ crawl_event[child] = NULL;
+ }
+ UNLOCK (&priv->lock);
+out:
+ return ret;
+}
+
+static int
+afr_dir_exclusive_crawl (void *data)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ gf_boolean_t crawl = _gf_false;
+ int ret = 0;
+ int child = -1;
+ xlator_t *this = NULL;
+ afr_crawl_data_t *crawl_data = data;
+
+ this = THIS;
+ priv = this->private;
+ shd = &priv->shd;
+ child = crawl_data->child;
+
+ LOCK (&priv->lock);
+ {
+ if (shd->inprogress[child]) {
+ if (shd->pending[child] != FULL)
+ shd->pending[child] = crawl_data->crawl;
+ } else {
+ shd->inprogress[child] = _gf_true;
+ crawl = _gf_true;
+ }
+ }
+ UNLOCK (&priv->lock);
+
+ if (!crawl) {
+ gf_log (this->name, GF_LOG_INFO, "Another crawl is in progress "
+ "for %s", priv->children[child]->name);
+ goto out;
+ }
+
+ do {
+ ret = afr_allocate_crawl_event (this, child, crawl_data->crawl);
+ if (ret)
+ goto out;
+ afr_dir_crawl (data);
+
+ ret = afr_put_crawl_event_in_eh (this, child);
+ if (ret < 0)
+ goto out;
+
+ LOCK (&priv->lock);
+ {
+ if (shd->pending[child] != NONE) {
+ crawl_data->crawl = shd->pending[child];
+ shd->pending[child] = NONE;
+ } else {
+ shd->inprogress[child] = _gf_false;
+ crawl = _gf_false;
+ }
+ }
+ UNLOCK (&priv->lock);
+ } while (crawl);
+out:
+ return ret;
+}
+
+void
+afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl,
+ process_entry_cbk_t process_entry, void *op_data,
+ gf_boolean_t exclusive, int crawl_flags,
+ afr_crawl_done_cbk_t crawl_done)
+{
+ afr_private_t *priv = NULL;
+ call_frame_t *frame = NULL;
+ afr_crawl_data_t *crawl_data = NULL;
+ int ret = 0;
+ int (*crawler) (void*) = NULL;
+
+ priv = this->private;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ afr_set_lk_owner (frame, this, frame->root);
+ afr_set_low_priority (frame);
+ crawl_data = GF_CALLOC (1, sizeof (*crawl_data),
+ gf_afr_mt_crawl_data_t);
+ if (!crawl_data)
+ goto out;
+ crawl_data->process_entry = process_entry;
+ crawl_data->child = idx;
+ crawl_data->pid = frame->root->pid;
+ crawl_data->crawl = crawl;
+ crawl_data->op_data = op_data;
+ crawl_data->crawl_flags = crawl_flags;
+ gf_log (this->name, GF_LOG_DEBUG, "starting crawl %d for %s",
+ crawl_data->crawl, priv->children[idx]->name);
+
+ if (exclusive)
+ crawler = afr_dir_exclusive_crawl;
+ else
+ crawler = afr_dir_crawl;
+ ret = synctask_new (this->ctx->env, crawler,
+ crawl_done, frame, crawl_data);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "afr crawl failed for child"
+ " %d with ret %d", idx, ret);
+out:
+ return;
+}
+
+void
+afr_build_root_loc (xlator_t *this, loc_t *loc)
+{
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ loc->path = gf_strdup ("/");
+ loc->name = "";
+ loc->inode = inode_ref (priv->root_inode);
+ uuid_copy (loc->gfid, loc->inode->gfid);
+}
+
+int
+afr_set_root_gfid (dict_t *dict)
+{
+ uuid_t gfid;
+ int ret = 0;
+
+ memset (gfid, 0, 16);
+ gfid[15] = 1;
+
+ ret = afr_set_dict_gfid (dict, gfid);
+
+ return ret;
+}
diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h
new file mode 100644
index 000000000..e0c083754
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-self-heald.h
@@ -0,0 +1,65 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __AFR_SELF_HEALD_H__
+#define __AFR_SELF_HEALD_H__
+#include "xlator.h"
+
+#define IS_ROOT_PATH(path) (!strcmp (path, "/"))
+#define IS_ENTRY_CWD(entry) (!strcmp (entry, "."))
+#define IS_ENTRY_PARENT(entry) (!strcmp (entry, ".."))
+#define AFR_ALL_CHILDREN -1
+
+typedef struct afr_crawl_data_ {
+ int child;
+ pid_t pid;
+ afr_crawl_type_t crawl;
+ xlator_t *readdir_xl;
+ void *op_data;
+ int crawl_flags;
+ int (*process_entry) (xlator_t *this, struct afr_crawl_data_ *crawl_data,
+ gf_dirent_t *entry, loc_t *child, loc_t *parent,
+ struct iatt *iattr);
+} afr_crawl_data_t;
+
+typedef struct crawl_event_stats_ {
+ uint64_t healed_count;
+ uint64_t split_brain_count;
+ uint64_t heal_failed_count;
+ char *start_time_str;
+ char *end_time_str;
+ char *crawl_type;
+ gf_boolean_t crawl_inprogress;
+} shd_crawl_event_t;
+
+void _destroy_crawl_event_data (void *data);
+void _destroy_shd_event_data (void *data);
+
+typedef int (*process_entry_cbk_t) (xlator_t *this, afr_crawl_data_t *crawl_data,
+ gf_dirent_t *entry, loc_t *child, loc_t *parent,
+ struct iatt *iattr);
+
+void afr_build_root_loc (xlator_t *this, loc_t *loc);
+
+int afr_set_root_gfid (dict_t *dict);
+
+void
+afr_proactive_self_heal (void *data);
+
+int
+afr_xl_op (xlator_t *this, dict_t *input, dict_t *output);
+
+/*
+ * In addition to its self-heal use, this is used to find a local default
+ * read_child.
+ */
+int
+afr_local_pathinfo (char *pathinfo, gf_boolean_t *local);
+#endif /* __AFR_SELF_HEALD_H__ */
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 80c96e887..20306e469 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -1,64 +1,108 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include "dict.h"
#include "byte-order.h"
+#include "common-utils.h"
+#include "timer.h"
#include "afr.h"
#include "afr-transaction.h"
#include <signal.h>
-#include <alloca.h>
+
+
+#define LOCKED_NO 0x0 /* no lock held */
+#define LOCKED_YES 0x1 /* for DATA, METADATA, ENTRY and higher_path
+ of RENAME */
+#define LOCKED_LOWER 0x2 /* for lower_path of RENAME */
+
+afr_fd_ctx_t *
+__afr_fd_ctx_get (fd_t *fd, xlator_t *this)
+{
+ uint64_t ctx = 0;
+ int ret = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int i = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ ret = __fd_ctx_get (fd, this, &ctx);
+
+ if (ret < 0 && fd_is_anonymous (fd)) {
+ ret = __afr_fd_ctx_set (this, fd);
+ if (ret < 0)
+ goto out;
+
+ ret = __fd_ctx_get (fd, this, &ctx);
+ if (ret < 0)
+ goto out;
+
+ fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ for (i = 0; i < priv->child_count; i++)
+ fd_ctx->opened_on[i] = AFR_FD_OPENED;
+ }
+
+ fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+out:
+ return fd_ctx;
+}
+
+
+afr_fd_ctx_t *
+afr_fd_ctx_get (fd_t *fd, xlator_t *this)
+{
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ LOCK(&fd->lock);
+ {
+ fd_ctx = __afr_fd_ctx_get (fd, this);
+ }
+ UNLOCK(&fd->lock);
+
+ return fd_ctx;
+}
static void
-afr_pid_save (call_frame_t *frame)
+afr_save_lk_owner (call_frame_t *frame)
{
afr_local_t * local = NULL;
local = frame->local;
- local->saved_pid = frame->root->pid;
+ local->saved_lk_owner = frame->root->lk_owner;
}
static void
-afr_pid_restore (call_frame_t *frame)
+afr_restore_lk_owner (call_frame_t *frame)
{
afr_local_t * local = NULL;
local = frame->local;
- frame->root->pid = local->saved_pid;
+ frame->root->lk_owner = local->saved_lk_owner;
}
-
static void
__mark_all_pending (int32_t *pending[], int child_count,
afr_transaction_type type)
-{
- int i;
- int j;
+{
+ int i = 0;
+ int j = 0;
for (i = 0; i < child_count; i++) {
j = afr_index_for_transaction_type (type);
- pending[i][j] = hton32 (1);
+ pending[i][j] = hton32 (1);
}
}
@@ -67,260 +111,170 @@ static void
__mark_child_dead (int32_t *pending[], int child_count, int child,
afr_transaction_type type)
{
- int j;
+ int j = 0;
j = afr_index_for_transaction_type (type);
-
- pending[child][j] = 0;
-}
-
-
-static void
-__mark_fop_failed_on_fd (fd_t *fd, xlator_t *this,
- int child_index)
-{
- uint64_t ctx;
- afr_fd_ctx_t * fd_ctx = NULL;
-
- int ret = 0;
-
- ret = fd_ctx_get (fd, this, &ctx);
-
- if (ret < 0)
- goto out;
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- fd_ctx->child_failed[child_index] = 1;
-out:
- return;
+ pending[child][j] = 0;
}
static void
-__mark_failed_children (int32_t *pending[], int child_count,
- xlator_t *this, fd_t *fd, afr_transaction_type type)
+__mark_pre_op_done_on_fd (call_frame_t *frame, xlator_t *this, int child_index)
{
- uint64_t ctx;
- afr_fd_ctx_t * fd_ctx = NULL;
+ afr_local_t *local = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
- int ret = 0;
- int i = 0;
- int j = 0;
+ local = frame->local;
- ret = fd_ctx_get (fd, this, &ctx);
+ if (!local->fd)
+ return;
- if (ret < 0)
- goto out;
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- for (i = 0; i < child_count; i++) {
- j = afr_index_for_transaction_type (type);
+ if (!fd_ctx)
+ goto out;
- if (fd_ctx->child_failed[i])
- pending[i][j] = 0;
+ LOCK (&local->fd->lock);
+ {
+ if (local->transaction.type == AFR_DATA_TRANSACTION)
+ fd_ctx->pre_op_done[child_index]++;
}
-
+ UNLOCK (&local->fd->lock);
out:
return;
}
-
static void
-__mark_down_children (int32_t *pending[], int child_count,
- unsigned char *child_up, afr_transaction_type type)
+__mark_non_participant_children (int32_t *pending[], int child_count,
+ unsigned char *participants,
+ afr_transaction_type type)
{
- int i;
- int j;
+ int i = 0;
+ int j = 0;
- for (i = 0; i < child_count; i++) {
- j = afr_index_for_transaction_type (type);
-
- if (!child_up[i])
- pending[i][j] = 0;
+ j = afr_index_for_transaction_type (type);
+ for (i = 0; i < child_count; i++) {
+ if (!participants[i])
+ pending[i][j] = 0;
}
}
-static void
+void
__mark_all_success (int32_t *pending[], int child_count,
afr_transaction_type type)
{
- int i;
+ int i;
int j;
- for (i = 0; i < child_count; i++) {
+ for (i = 0; i < child_count; i++) {
j = afr_index_for_transaction_type (type);
- pending[i][j] = hton32 (-1);
+ pending[i][j] = hton32 (-1);
}
}
-
-static int
-__is_first_write_on_fd (xlator_t *this, fd_t *fd)
+void
+_set_all_child_errno (int *child_errno, unsigned int child_count)
{
- int op_ret = 0;
- int _ret = -1;
+ int i = 0;
- uint64_t ctx;
- afr_fd_ctx_t * fd_ctx = NULL;
-
- LOCK (&fd->lock);
- {
- _ret = __fd_ctx_get (fd, this, &ctx);
-
- if (_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not get fd ctx on fd=%p",
- fd);
- goto out;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- if (fd_ctx->pre_op_done == 0) {
- fd_ctx->pre_op_done = 1;
- op_ret = 1;
- }
- }
-out:
- UNLOCK (&fd->lock);
-
- return op_ret;
+ for (i = 0; i < child_count; i++)
+ if (child_errno[i] == 0)
+ child_errno[i] = ENOTCONN;
}
-
-static int
-__if_fd_pre_op_done (xlator_t *this, fd_t *fd)
+void
+afr_transaction_perform_fop (call_frame_t *frame, xlator_t *this)
{
- int op_ret = 0;
- int _ret = -1;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ fd_t *fd = NULL;
- uint64_t ctx;
- afr_fd_ctx_t * fd_ctx = NULL;
+ local = frame->local;
+ priv = this->private;
+ fd = local->fd;
- LOCK (&fd->lock);
- {
- _ret = __fd_ctx_get (fd, this, &ctx);
+ __mark_all_success (local->pending, priv->child_count,
+ local->transaction.type);
- if (_ret < 0) {
- goto out;
- }
+ _set_all_child_errno (local->child_errno, priv->child_count);
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ /* Perform fops with the lk-owner from top xlator.
+ * Eg: lk-owner of posix-lk and flush should be same,
+ * flush cant clear the posix-lks without that lk-owner.
+ */
+ afr_save_lk_owner (frame);
+ frame->root->lk_owner =
+ local->transaction.main_frame->root->lk_owner;
- if (fd_ctx->pre_op_done) {
- fd_ctx->pre_op_done = 0;
- op_ret = 1;
- }
- }
-out:
- UNLOCK (&fd->lock);
- return op_ret;
+ /* The wake up needs to happen independent of
+ what type of fop arrives here. If it was
+ a write, then it has already inherited the
+ lock and changelog. If it was not a write,
+ then the presumption of the optimization (of
+ optimizing for successive write operations)
+ fails.
+ */
+ if (fd)
+ afr_delayed_changelog_wake_up (this, fd);
+ local->transaction.fop (frame, this);
}
static int
__changelog_enabled (afr_private_t *priv, afr_transaction_type type)
{
- int ret = 0;
-
- switch (type) {
- case AFR_DATA_TRANSACTION:
- if (priv->data_change_log)
- ret = 1;
-
- break;
-
- case AFR_METADATA_TRANSACTION:
- if (priv->metadata_change_log)
- ret = 1;
-
- break;
-
- case AFR_ENTRY_TRANSACTION:
- case AFR_ENTRY_RENAME_TRANSACTION:
- if (priv->entry_change_log)
- ret = 1;
-
- break;
-
- case AFR_FLUSH_TRANSACTION:
- ret = 1;
- }
+ int ret = 0;
- return ret;
-}
+ switch (type) {
+ case AFR_DATA_TRANSACTION:
+ if (priv->data_change_log)
+ ret = 1;
+ break;
-static int
-__changelog_needed_pre_op (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- fd_t * fd = NULL;
+ case AFR_METADATA_TRANSACTION:
+ if (priv->metadata_change_log)
+ ret = 1;
- int op_ret = 0;
+ break;
- priv = this->private;
- local = frame->local;
-
- if (__changelog_enabled (priv, local->transaction.type)) {
- switch (local->op) {
-
- case GF_FOP_WRITE:
- case GF_FOP_FTRUNCATE:
- /*
- if it's a data transaction, we write the changelog
- only on the first write on an fd
- */
-
- fd = local->fd;
- if (!fd || __is_first_write_on_fd (this, fd))
- op_ret = 1;
-
- break;
-
- case GF_FOP_FLUSH:
- /* only do post-op on flush() */
-
- op_ret = 0;
- break;
-
- default:
- op_ret = 1;
- }
- }
+ case AFR_ENTRY_TRANSACTION:
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ if (priv->entry_change_log)
+ ret = 1;
- return op_ret;
+ break;
+ }
+
+ return ret;
}
static int
-__changelog_needed_post_op (call_frame_t *frame, xlator_t *this)
+__fop_changelog_needed (call_frame_t *frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
-
- int op_ret = 0;
- afr_transaction_type type = -1;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ int op_ret = 0;
+ afr_transaction_type type = -1;
- priv = this->private;
- local = frame->local;
- type = local->transaction.type;
+ priv = this->private;
+ local = frame->local;
+ type = local->transaction.type;
- if (__changelog_enabled (priv, type)) {
+ if (__changelog_enabled (priv, type)) {
switch (local->op) {
case GF_FOP_WRITE:
case GF_FOP_FTRUNCATE:
- op_ret = 0;
+ op_ret = 1;
break;
case GF_FOP_FLUSH:
- op_ret = __if_fd_pre_op_done (this, local->fd);
+ op_ret = 0;
break;
default:
@@ -328,817 +282,1536 @@ __changelog_needed_post_op (call_frame_t *frame, xlator_t *this)
}
}
- return op_ret;
+ return op_ret;
}
-
-static int
-afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending)
+int
+afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending,
+ int child, afr_xattrop_type_t op)
{
- int i;
+ int i = 0;
int ret = 0;
+ if (op == LOCAL_FIRST) {
+ ret = dict_set_static_bin (xattr, priv->pending_key[child],
+ pending[child],
+ AFR_NUM_CHANGE_LOGS * sizeof (int32_t));
+ if (ret)
+ goto out;
+ }
for (i = 0; i < priv->child_count; i++) {
+ if (i == child)
+ continue;
ret = dict_set_static_bin (xattr, priv->pending_key[i],
- pending[i], 3 * sizeof (int32_t));
+ pending[i],
+ AFR_NUM_CHANGE_LOGS * sizeof (int32_t));
/* 3 = data+metadata+entry */
-
+
if (ret < 0)
goto out;
}
-
+ if (op == LOCAL_LAST) {
+ ret = dict_set_static_bin (xattr, priv->pending_key[child],
+ pending[child],
+ AFR_NUM_CHANGE_LOGS * sizeof (int32_t));
+ if (ret)
+ goto out;
+ }
out:
return ret;
}
-
-static int
+int
afr_lock_server_count (afr_private_t *priv, afr_transaction_type type)
{
- int ret = 0;
-
- switch (type) {
- case AFR_FLUSH_TRANSACTION:
- case AFR_DATA_TRANSACTION:
- ret = priv->data_lock_server_count;
- break;
-
- case AFR_METADATA_TRANSACTION:
- ret = priv->metadata_lock_server_count;
- break;
-
- case AFR_ENTRY_TRANSACTION:
- case AFR_ENTRY_RENAME_TRANSACTION:
- ret = priv->entry_lock_server_count;
- break;
- }
+ int ret = 0;
- return ret;
-}
+ switch (type) {
+ case AFR_DATA_TRANSACTION:
+ ret = priv->child_count;
+ break;
+
+ case AFR_METADATA_TRANSACTION:
+ ret = priv->child_count;
+ break;
+ case AFR_ENTRY_TRANSACTION:
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ ret = priv->child_count;
+ break;
+ }
+
+ return ret;
+}
-/* {{{ unlock */
+/* {{{ pending */
int32_t
-afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+afr_changelog_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
{
- afr_local_t *local;
- int call_count = 0;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int call_count = -1;
- local = frame->local;
+ priv = this->private;
+ local = frame->local;
+ int_lock = &local->internal_lock;
- LOCK (&frame->lock);
- {
- call_count = --local->call_count;
- }
- UNLOCK (&frame->lock);
+ LOCK (&frame->lock);
+ {
+ call_count = --local->call_count;
+ }
+ UNLOCK (&frame->lock);
- if (call_count == 0) {
- local->transaction.done (frame, this);
- }
-
- return 0;
+ if (call_count == 0) {
+ if (local->transaction.resume_stub) {
+ call_resume (local->transaction.resume_stub);
+ local->transaction.resume_stub = NULL;
+ }
+
+ if (afr_lock_server_count (priv, local->transaction.type) == 0) {
+ local->transaction.done (frame, this);
+ } else {
+ int_lock->lock_cbk = local->transaction.done;
+ afr_unlock (frame, this);
+ }
+ }
+
+ return 0;
}
-int
-afr_unlock (call_frame_t *frame, xlator_t *this)
+void
+afr_transaction_rm_stale_children (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, afr_transaction_type type)
{
- struct flock flock;
+ int i = -1;
+ int count = 0;
+ int read_child = -1;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int **pending = NULL;
+ int idx = 0;
+ int32_t *stale_children = NULL;
+ int32_t *fresh_children = NULL;
+ gf_boolean_t rm_stale_children = _gf_false;
+
+ idx = afr_index_for_transaction_type (type);
+
+ priv = this->private;
+ local = frame->local;
+ pending = local->pending;
- int i = 0;
- int call_count = 0;
+ if (local->op_ret < 0)
+ goto out;
+ fresh_children = local->fresh_children;
+ read_child = afr_inode_get_read_ctx (this, inode, fresh_children);
+ if (read_child < 0) {
+ gf_log (this->name, GF_LOG_DEBUG, "Possible split-brain "
+ "for %s", uuid_utoa (inode->gfid));
+ goto out;
+ }
- afr_local_t *local = NULL;
- afr_private_t * priv = this->private;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!afr_is_child_present (fresh_children,
+ priv->child_count, i))
+ continue;
+ if (pending[i][idx])
+ continue;
+ /* child is down or op failed on it */
+ if (!stale_children)
+ stale_children = afr_children_create (priv->child_count);
+ if (!stale_children)
+ goto out;
- local = frame->local;
+ rm_stale_children = _gf_true;
+ stale_children[count++] = i;
+ gf_log (this->name, GF_LOG_DEBUG, "Removing stale child "
+ "%d for %s", i, uuid_utoa (inode->gfid));
+ }
- /*
- pid has been restored to saved_pid in the fop,
- so set it back to frame->root
- */
+ if (!rm_stale_children)
+ goto out;
+
+ afr_inode_rm_stale_children (this, inode, stale_children);
+out:
+ GF_FREE (stale_children);
+ return;
+}
- frame->root->pid = (long) frame->root;
+afr_inodelk_t*
+afr_get_inodelk (afr_internal_lock_t *int_lock, char *dom)
+{
+ afr_inodelk_t *inodelk = NULL;
+ int i = 0;
- call_count = afr_locked_nodes_count (local->transaction.locked_nodes,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.done (frame, this);
- return 0;
- }
+ for (i = 0; int_lock->inodelk[i].domain; i++) {
+ inodelk = &int_lock->inodelk[i];
+ if (strcmp (dom, inodelk->domain) == 0)
+ return inodelk;
+ }
+ return NULL;
+}
- if (local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION)
- call_count *= 2;
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- flock.l_start = local->transaction.start;
- flock.l_len = local->transaction.len;
- flock.l_type = F_UNLCK;
-
- if (local->transaction.locked_nodes[i]) {
- switch (local->transaction.type) {
- case AFR_DATA_TRANSACTION:
- case AFR_METADATA_TRANSACTION:
- case AFR_FLUSH_TRANSACTION:
-
- if (local->fd) {
- STACK_WIND (frame, afr_unlock_common_cbk,
- priv->children[i],
- priv->children[i]->fops->finodelk,
- this->name, local->fd,
- F_SETLK, &flock);
- } else {
- STACK_WIND (frame, afr_unlock_common_cbk,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- this->name, &local->loc,
- F_SETLK, &flock);
- }
-
- break;
-
- case AFR_ENTRY_RENAME_TRANSACTION:
-
- STACK_WIND (frame, afr_unlock_common_cbk,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
-
- call_count--;
-
- /* fall through */
-
- case AFR_ENTRY_TRANSACTION:
- if (local->fd) {
- STACK_WIND (frame, afr_unlock_common_cbk,
- priv->children[i],
- priv->children[i]->fops->fentrylk,
- this->name, local->fd,
- local->transaction.basename,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
- } else {
- STACK_WIND (frame, afr_unlock_common_cbk,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name,
- &local->transaction.parent_loc,
- local->transaction.basename,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
-
- }
- break;
- }
-
- if (!--call_count)
- break;
- }
- }
+unsigned char*
+afr_locked_nodes_get (afr_transaction_type type, afr_internal_lock_t *int_lock)
+{
+ unsigned char *locked_nodes = NULL;
+ afr_inodelk_t *inodelk = NULL;
+ switch (type) {
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ locked_nodes = inodelk->locked_nodes;
+ break;
+
+ case AFR_ENTRY_TRANSACTION:
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ /*Because same set of subvols participate in all lockee
+ * entities*/
+ locked_nodes = int_lock->lockee[0].locked_nodes;
+ break;
+ }
+ return locked_nodes;
+}
+
+int
+afr_changelog_pre_op_call_count (afr_transaction_type type,
+ afr_internal_lock_t *int_lock,
+ unsigned int child_count)
+{
+ int call_count = 0;
+ unsigned char *locked_nodes = NULL;
- return 0;
+ locked_nodes = afr_locked_nodes_get (type, int_lock);
+ GF_ASSERT (locked_nodes);
+
+ call_count = afr_locked_children_count (locked_nodes, child_count);
+ if (type == AFR_ENTRY_RENAME_TRANSACTION)
+ call_count *= 2;
+
+ return call_count;
}
-/* }}} */
+int
+afr_changelog_post_op_call_count (afr_transaction_type type,
+ unsigned char *pre_op,
+ unsigned int child_count)
+{
+ int call_count = 0;
+ call_count = afr_pre_op_done_children_count (pre_op, child_count);
+ if (type == AFR_ENTRY_RENAME_TRANSACTION)
+ call_count *= 2;
-/* {{{ pending */
+ return call_count;
+}
-int32_t
-afr_changelog_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
+void
+afr_compute_txn_changelog (afr_local_t *local, afr_private_t *priv)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
-
- int call_count = -1;
+ int i = 0;
+ int index = 0;
+ int32_t postop = 0;
+ int32_t preop = 1;
+ int32_t **txn_changelog = NULL;
+
+ txn_changelog = local->transaction.txn_changelog;
+ index = afr_index_for_transaction_type (local->transaction.type);
+ for (i = 0; i < priv->child_count; i++) {
+ postop = ntoh32 (local->pending[i][index]);
+ txn_changelog[i][index] = hton32 (postop + preop);
+ }
+}
- priv = this->private;
- local = frame->local;
+afr_xattrop_type_t
+afr_get_postop_xattrop_type (int32_t **pending, int optimized, int child,
+ afr_transaction_type type)
+{
+ int index = 0;
+ afr_xattrop_type_t op = LOCAL_LAST;
- LOCK (&frame->lock);
- {
- call_count = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (call_count == 0) {
- if (afr_lock_server_count (priv, local->transaction.type) == 0) {
- local->transaction.done (frame, this);
- } else {
- afr_unlock (frame, this);
- }
- }
+ index = afr_index_for_transaction_type (type);
+ if (optimized && !pending[child][index])
+ op = LOCAL_FIRST;
+ return op;
+}
- return 0;
+void
+afr_set_postop_dict (afr_local_t *local, xlator_t *this, dict_t *xattr,
+ int optimized, int child)
+{
+ int32_t **txn_changelog = NULL;
+ int32_t **changelog = NULL;
+ afr_private_t *priv = NULL;
+ int ret = 0;
+ afr_xattrop_type_t op = LOCAL_LAST;
+
+ priv = this->private;
+ txn_changelog = local->transaction.txn_changelog;
+ op = afr_get_postop_xattrop_type (local->pending, optimized, child,
+ local->transaction.type);
+ if (optimized)
+ changelog = txn_changelog;
+ else
+ changelog = local->pending;
+ ret = afr_set_pending_dict (priv, xattr, changelog, child, op);
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_INFO,
+ "failed to set pending entry");
}
-int
-afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
+gf_boolean_t
+afr_txn_nothing_failed (call_frame_t *frame, xlator_t *this)
{
- afr_private_t * priv = this->private;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int index = -1;
+ int i = 0;
- int ret = 0;
- int i = 0;
- int call_count = 0;
-
- afr_local_t * local = NULL;
- dict_t **xattr = NULL;
+ local = frame->local;
+ priv = this->private;
- local = frame->local;
+ index = afr_index_for_transaction_type (local->transaction.type);
- __mark_down_children (local->pending, priv->child_count,
- local->child_up, local->transaction.type);
-
- if (local->op == GF_FOP_FLUSH) {
- __mark_failed_children (local->pending, priv->child_count,
- this, local->fd,
- local->transaction.type);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->pending[i][index] == 0)
+ return _gf_false;
}
+ return _gf_true;
+}
+
+static void
+afr_dir_fop_handle_all_fop_failures (call_frame_t *frame)
+{
+ xlator_t *this = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ this = frame->this;
+ local = frame->local;
+ priv = this->private;
+
+ if ((local->transaction.type != AFR_ENTRY_TRANSACTION) &&
+ (local->transaction.type != AFR_ENTRY_RENAME_TRANSACTION))
+ return;
+
+ if (local->op_ret >= 0)
+ goto out;
+
+ __mark_all_success (local->pending, priv->child_count,
+ local->transaction.type);
+out:
+ return;
+}
+
+static void
+afr_data_handle_quota_errors (call_frame_t *frame, xlator_t *this)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ gf_boolean_t all_quota_failures = _gf_false;
+
+ local = frame->local;
+ priv = this->private;
+ if (local->transaction.type != AFR_DATA_TRANSACTION)
+ return;
+ /*
+ * Idea is to not leave the file in FOOL-FOOL scenario in case on
+ * all the bricks data transaction failed with EDQUOT to avoid
+ * increasing un-necessary load of self-heals in the system.
+ */
+ all_quota_failures = _gf_true;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i] &&
+ (local->child_errno[i] != EDQUOT)) {
+ all_quota_failures = _gf_false;
+ break;
+ }
+ }
+ if (all_quota_failures)
+ __mark_all_success (local->pending, priv->child_count,
+ local->transaction.type);
+}
+
+int
+afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t * priv = this->private;
+ afr_internal_lock_t *int_lock = NULL;
+ int i = 0;
+ int call_count = 0;
+
+ afr_local_t * local = NULL;
+ afr_fd_ctx_t *fdctx = NULL;
+ dict_t **xattr = NULL;
+ int piggyback = 0;
+ int nothing_failed = 1;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ __mark_non_participant_children (local->pending, priv->child_count,
+ local->transaction.pre_op,
+ local->transaction.type);
+
+ afr_data_handle_quota_errors (frame, this);
+ afr_dir_fop_handle_all_fop_failures (frame);
+
+ if (local->fd)
+ afr_transaction_rm_stale_children (frame, this,
+ local->fd->inode,
+ local->transaction.type);
+
xattr = alloca (priv->child_count * sizeof (*xattr));
memset (xattr, 0, (priv->child_count * sizeof (*xattr)));
- for (i = 0; i < priv->child_count; i++) {
- xattr[i] = get_new_dict ();
- dict_ref (xattr[i]);
+ for (i = 0; i < priv->child_count; i++) {
+ xattr[i] = dict_new ();
}
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_changelog_post_op_call_count (local->transaction.type,
+ local->transaction.pre_op,
+ priv->child_count);
+ local->call_count = call_count;
- if (local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
- call_count *= 2;
- }
+ if (local->fd)
+ fdctx = afr_fd_ctx_get (local->fd, this);
- local->call_count = call_count;
+ if (call_count == 0) {
+ /* no child is up */
+ int_lock->lock_cbk = local->transaction.done;
+ afr_unlock (frame, this);
+ goto out;
+ }
- if (call_count == 0) {
- /* no child is up */
- for (i = 0; i < priv->child_count; i++) {
- dict_unref (xattr[i]);
+ nothing_failed = afr_txn_nothing_failed (frame, this);
+
+ afr_compute_txn_changelog (local , priv);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.pre_op[i])
+ continue;
+
+ if (local->transaction.type != AFR_DATA_TRANSACTION)
+ afr_set_postop_dict (local, this, xattr[i],
+ local->optimistic_change_log, i);
+ switch (local->transaction.type) {
+ case AFR_DATA_TRANSACTION:
+ {
+ if (!fdctx) {
+ afr_set_postop_dict (local, this, xattr[i],
+ 0, i);
+ STACK_WIND (frame, afr_changelog_post_op_cbk,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->loc,
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
+ break;
+ }
+
+ /* local->transaction.postop_piggybacked[] was
+ precomputed in is_piggyback_postop() when called from
+ afr_changelog_post_op_safe()
+ */
+
+ piggyback = 0;
+ if (local->transaction.postop_piggybacked[i])
+ piggyback = 1;
+
+ afr_set_postop_dict (local, this, xattr[i],
+ piggyback, i);
+
+ if (nothing_failed && piggyback) {
+ afr_changelog_post_op_cbk (frame, (void *)(long)i,
+ this, 1, 0, xattr[i], NULL);
+ } else {
+ STACK_WIND_COOKIE (frame,
+ afr_changelog_post_op_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fxattrop,
+ local->fd,
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
+ }
}
-
- afr_unlock (frame, this);
- return 0;
- }
+ break;
+ case AFR_METADATA_TRANSACTION:
+ {
+ if (nothing_failed && local->optimistic_change_log) {
+ afr_changelog_post_op_cbk (frame, (void *)(long)i,
+ this, 1, 0, xattr[i],
+ NULL);
+ break;
+ }
+
+ if (local->fd)
+ STACK_WIND (frame, afr_changelog_post_op_cbk,
+ priv->children[i],
+ priv->children[i]->fops->fxattrop,
+ local->fd,
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
+ else
+ STACK_WIND (frame, afr_changelog_post_op_cbk,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->loc,
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
+ }
+ break;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- ret = afr_set_pending_dict (priv, xattr[i],
- local->pending);
-
- if (ret < 0)
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set pending entry");
-
-
- switch (local->transaction.type) {
- case AFR_DATA_TRANSACTION:
- case AFR_METADATA_TRANSACTION:
- case AFR_FLUSH_TRANSACTION:
- {
- if (local->fd)
- STACK_WIND (frame, afr_changelog_post_op_cbk,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- else
- STACK_WIND (frame, afr_changelog_post_op_cbk,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- }
- break;
-
- case AFR_ENTRY_RENAME_TRANSACTION:
- {
- STACK_WIND_COOKIE (frame, afr_changelog_post_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->transaction.new_parent_loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
-
- call_count--;
- }
-
- /*
- set it again because previous stack_wind
- might have already returned (think of case
- where subvolume is posix) and would have
- used the dict as placeholder for return
- value
- */
-
- ret = afr_set_pending_dict (priv, xattr[i],
- local->pending);
-
- if (ret < 0)
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set pending entry");
-
- /* fall through */
-
- case AFR_ENTRY_TRANSACTION:
- {
- if (local->fd)
- STACK_WIND (frame, afr_changelog_post_op_cbk,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- else
- STACK_WIND (frame, afr_changelog_post_op_cbk,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->transaction.parent_loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- }
- break;
- }
-
- if (!--call_count)
- break;
- }
- }
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ {
+ if (nothing_failed && local->optimistic_change_log) {
+ afr_changelog_post_op_cbk (frame, (void *)(long)i,
+ this, 1, 0, xattr[i],
+ NULL);
+ } else {
+ STACK_WIND_COOKIE (frame, afr_changelog_post_op_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->transaction.new_parent_loc,
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
+ }
+ call_count--;
+ }
+
+ /*
+ set it again because previous stack_wind
+ might have already returned (think of case
+ where subvolume is posix) and would have
+ used the dict as placeholder for return
+ value
+ */
+
+ afr_set_postop_dict (local, this, xattr[i],
+ local->optimistic_change_log, i);
+
+ /* fall through */
+
+ case AFR_ENTRY_TRANSACTION:
+ {
+ if (nothing_failed && local->optimistic_change_log) {
+ afr_changelog_post_op_cbk (frame, (void *)(long)i,
+ this, 1, 0, xattr[i],
+ NULL);
+ break;
+ }
+
+ if (local->fd)
+ STACK_WIND (frame, afr_changelog_post_op_cbk,
+ priv->children[i],
+ priv->children[i]->fops->fxattrop,
+ local->fd,
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
+ else
+ STACK_WIND (frame, afr_changelog_post_op_cbk,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->transaction.parent_loc,
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
+ }
+ break;
+ }
+ if (!--call_count)
+ break;
+ }
+
+out:
for (i = 0; i < priv->child_count; i++) {
dict_unref (xattr[i]);
}
- return 0;
+ return 0;
}
int32_t
afr_changelog_pre_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = this->private;
- loc_t * loc = NULL;
-
- int call_count = -1;
- int child_index = (long) cookie;
-
- local = frame->local;
- loc = &local->loc;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->child_up[child_index] = 0;
-
- if (op_errno == ENOTSUP) {
- gf_log (this->name, GF_LOG_ERROR,
- "xattrop not supported by %s",
- priv->children[child_index]->name);
- local->op_ret = -1;
-
- } else if (!child_went_down (op_ret, op_errno)) {
- gf_log (this->name, GF_LOG_ERROR,
- "xattrop failed on child %s: %s",
- priv->children[child_index]->name,
- strerror (op_errno));
- }
- local->op_errno = op_errno;
- }
-
- call_count = --local->call_count;
- }
- UNLOCK (&frame->lock);
+ afr_local_t * local = NULL;
+ afr_private_t * priv = this->private;
+ int call_count = -1;
+ int child_index = (long) cookie;
- if (call_count == 0) {
- if ((local->op_ret == -1) &&
- (local->op_errno == ENOTSUP)) {
- local->transaction.resume (frame, this);
- } else {
- __mark_all_success (local->pending, priv->child_count,
- local->transaction.type);
+ local = frame->local;
- afr_pid_restore (frame);
+ LOCK (&frame->lock);
+ {
+ switch (op_ret) {
+ case 0:
+ __mark_pre_op_done_on_fd (frame, this, child_index);
+ //fallthrough we need to mark the pre_op
+ case 1:
+ local->transaction.pre_op[child_index] = 1;
+ /* special op_ret for piggyback */
+ break;
+ case -1:
+ if (op_errno == ENOTSUP) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "xattrop not supported by %s",
+ priv->children[child_index]->name);
+ local->op_ret = -1;
+
+ } else if (!child_went_down (op_ret, op_errno)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "xattrop failed on child %s: %s",
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ }
+ local->op_errno = op_errno;
+ break;
+ }
- local->transaction.fop (frame, this);
- }
- }
+ call_count = --local->call_count;
+ }
+ UNLOCK (&frame->lock);
+
+ if (call_count == 0) {
+ if ((local->op_ret == -1) &&
+ (local->op_errno == ENOTSUP)) {
+ local->transaction.resume (frame, this);
+ } else {
+ afr_transaction_perform_fop (frame, this);
+ }
+ }
- return 0;
+ return 0;
}
-
-int
+int
afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
{
- afr_private_t * priv = this->private;
-
- int i = 0;
- int ret = 0;
- int call_count = 0;
- dict_t **xattr = NULL;
+ afr_private_t * priv = this->private;
+ int i = 0;
+ int ret = 0;
+ int call_count = 0;
+ dict_t **xattr = NULL;
+ afr_fd_ctx_t *fdctx = NULL;
+ afr_local_t *local = NULL;
+ int piggyback = 0;
+ afr_internal_lock_t *int_lock = NULL;
+ unsigned char *locked_nodes = NULL;
- afr_local_t *local = NULL;
+ local = frame->local;
+ int_lock = &local->internal_lock;
- local = frame->local;
-
xattr = alloca (priv->child_count * sizeof (*xattr));
memset (xattr, 0, (priv->child_count * sizeof (*xattr)));
- for (i = 0; i < priv->child_count; i++) {
- xattr[i] = get_new_dict ();
- dict_ref (xattr[i]);
+ for (i = 0; i < priv->child_count; i++) {
+ xattr[i] = dict_new ();
}
- call_count = afr_up_children_count (priv->child_count,
- local->child_up);
+ call_count = afr_changelog_pre_op_call_count (local->transaction.type,
+ int_lock,
+ priv->child_count);
+ if (call_count == 0) {
+ local->internal_lock.lock_cbk =
+ local->transaction.done;
+ afr_unlock (frame, this);
+ goto out;
+ }
- if (local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
- call_count *= 2;
- }
+ local->call_count = call_count;
- if (call_count == 0) {
- /* no child is up */
- for (i = 0; i < priv->child_count; i++) {
- dict_unref (xattr[i]);
+ __mark_all_pending (local->pending, priv->child_count,
+ local->transaction.type);
+
+ if (local->fd)
+ fdctx = afr_fd_ctx_get (local->fd, this);
+
+ locked_nodes = afr_locked_nodes_get (local->transaction.type, int_lock);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!locked_nodes[i])
+ continue;
+ ret = afr_set_pending_dict (priv, xattr[i], local->pending,
+ i, LOCAL_FIRST);
+
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_INFO,
+ "failed to set pending entry");
+
+
+ switch (local->transaction.type) {
+ case AFR_DATA_TRANSACTION:
+ {
+ if (!fdctx) {
+ STACK_WIND_COOKIE (frame,
+ afr_changelog_pre_op_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &(local->loc),
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
+ break;
+ }
+
+ LOCK (&local->fd->lock);
+ {
+ piggyback = 0;
+ if (fdctx->pre_op_done[i]) {
+ fdctx->pre_op_piggyback[i]++;
+ piggyback = 1;
+ fdctx->hit++;
+ } else {
+ fdctx->miss++;
+ }
+ }
+ UNLOCK (&local->fd->lock);
+
+ afr_set_delayed_post_op (frame, this);
+
+ if (piggyback)
+ afr_changelog_pre_op_cbk (frame, (void *)(long)i,
+ this, 1, 0, xattr[i],
+ NULL);
+ else
+ STACK_WIND_COOKIE (frame,
+ afr_changelog_pre_op_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fxattrop,
+ local->fd,
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
}
-
- afr_unlock (frame, this);
- return 0;
- }
+ break;
+ case AFR_METADATA_TRANSACTION:
+ {
+ if (local->optimistic_change_log) {
+ afr_changelog_pre_op_cbk (frame, (void *)(long)i,
+ this, 1, 0, xattr[i],
+ NULL);
+ break;
+ }
+
+ if (local->fd)
+ STACK_WIND_COOKIE (frame,
+ afr_changelog_pre_op_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fxattrop,
+ local->fd,
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
+ else
+ STACK_WIND_COOKIE (frame,
+ afr_changelog_pre_op_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &(local->loc),
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
+ }
+ break;
- local->call_count = call_count;
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ {
+ if (local->optimistic_change_log) {
+ afr_changelog_pre_op_cbk (frame, (void *)(long)i,
+ this, 1, 0, xattr[i],
+ NULL);
+ } else {
+ STACK_WIND_COOKIE (frame,
+ afr_changelog_pre_op_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->transaction.new_parent_loc,
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
+ }
+
+ call_count--;
+ }
- __mark_all_pending (local->pending, priv->child_count,
- local->transaction.type);
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- ret = afr_set_pending_dict (priv, xattr[i],
- local->pending);
-
- if (ret < 0)
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set pending entry");
-
-
- switch (local->transaction.type) {
- case AFR_DATA_TRANSACTION:
- case AFR_METADATA_TRANSACTION:
- case AFR_FLUSH_TRANSACTION:
- {
- if (local->fd)
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- else
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &(local->loc),
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- }
- break;
-
- case AFR_ENTRY_RENAME_TRANSACTION:
- {
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->transaction.new_parent_loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
-
- call_count--;
- }
-
-
- /*
- set it again because previous stack_wind
- might have already returned (think of case
- where subvolume is posix) and would have
- used the dict as placeholder for return
- value
- */
-
- ret = afr_set_pending_dict (priv, xattr[i],
- local->pending);
-
- if (ret < 0)
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set pending entry");
-
- /* fall through */
-
- case AFR_ENTRY_TRANSACTION:
- {
- if (local->fd)
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- else
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->transaction.parent_loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- }
-
- break;
- }
-
- if (!--call_count)
- break;
- }
- }
-
+ /*
+ set it again because previous stack_wind
+ might have already returned (think of case
+ where subvolume is posix) and would have
+ used the dict as placeholder for return
+ value
+ */
+
+ ret = afr_set_pending_dict (priv, xattr[i], local->pending,
+ i, LOCAL_FIRST);
+
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_INFO,
+ "failed to set pending entry");
+
+ /* fall through */
+
+ case AFR_ENTRY_TRANSACTION:
+ {
+ if (local->optimistic_change_log) {
+ afr_changelog_pre_op_cbk (frame, (void *)(long)i,
+ this, 1, 0, xattr[i],
+ NULL);
+ break;
+ }
+
+ if (local->fd)
+ STACK_WIND_COOKIE (frame,
+ afr_changelog_pre_op_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fxattrop,
+ local->fd,
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
+ else
+ STACK_WIND_COOKIE (frame,
+ afr_changelog_pre_op_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->transaction.parent_loc,
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
+ }
+ break;
+ }
+
+ if (!--call_count)
+ break;
+ }
+out:
for (i = 0; i < priv->child_count; i++) {
dict_unref (xattr[i]);
}
-
- return 0;
+
+ return 0;
+}
+
+
+int
+afr_post_blocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ if (int_lock->lock_op_ret < 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Blocking inodelks failed.");
+ local->transaction.done (frame, this);
+ } else {
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Blocking inodelks done. Proceeding to FOP");
+ afr_internal_lock_finish (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+afr_post_nonblocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ /* Initiate blocking locks if non-blocking has failed */
+ if (int_lock->lock_op_ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Non blocking inodelks failed. Proceeding to blocking");
+ int_lock->lock_cbk = afr_post_blocking_inodelk_cbk;
+ afr_blocking_lock (frame, this);
+ } else {
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Non blocking inodelks done. Proceeding to FOP");
+ afr_internal_lock_finish (frame, this);
+ }
+
+ return 0;
}
+
+int
+afr_post_blocking_entrylk_cbk (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ if (int_lock->lock_op_ret < 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Blocking entrylks failed.");
+ local->transaction.done (frame, this);
+ } else {
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Blocking entrylks done. Proceeding to FOP");
+ afr_internal_lock_finish (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+afr_post_nonblocking_entrylk_cbk (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ /* Initiate blocking locks if non-blocking has failed */
+ if (int_lock->lock_op_ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Non blocking entrylks failed. Proceeding to blocking");
+ int_lock->lock_cbk = afr_post_blocking_entrylk_cbk;
+ afr_blocking_lock (frame, this);
+ } else {
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Non blocking entrylks done. Proceeding to FOP");
+ afr_internal_lock_finish (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+afr_post_blocking_rename_cbk (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ if (int_lock->lock_op_ret < 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Blocking entrylks failed.");
+ local->transaction.done (frame, this);
+ } else {
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Blocking entrylks done. Proceeding to FOP");
+ afr_internal_lock_finish (frame, this);
+ }
+ return 0;
+}
+
+
+int
+afr_post_lower_unlock_cbk (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ GF_ASSERT (!int_lock->higher_locked);
+
+ int_lock->lock_cbk = afr_post_blocking_rename_cbk;
+ afr_blocking_lock (frame, this);
+
+ return 0;
+}
+
+
+int
+afr_set_transaction_flock (afr_local_t *local)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
+
+ int_lock = &local->internal_lock;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+
+ inodelk->flock.l_len = local->transaction.len;
+ inodelk->flock.l_start = local->transaction.start;
+ inodelk->flock.l_type = F_WRLCK;
+
+ return 0;
+}
+
+int
+afr_lock_rec (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ int_lock->transaction_lk_type = AFR_TRANSACTION_LK;
+ int_lock->domain = this->name;
+
+ switch (local->transaction.type) {
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ afr_set_transaction_flock (local);
+
+ int_lock->lock_cbk = afr_post_nonblocking_inodelk_cbk;
+
+ afr_nonblocking_inodelk (frame, this);
+ break;
+
+ case AFR_ENTRY_RENAME_TRANSACTION:
+
+ int_lock->lock_cbk = afr_post_nonblocking_entrylk_cbk;
+ afr_nonblocking_entrylk (frame, this);
+ break;
+
+ case AFR_ENTRY_TRANSACTION:
+ int_lock->lk_basename = local->transaction.basename;
+ if (&local->transaction.parent_loc)
+ int_lock->lk_loc = &local->transaction.parent_loc;
+ else
+ GF_ASSERT (local->fd);
+
+ int_lock->lock_cbk = afr_post_nonblocking_entrylk_cbk;
+ afr_nonblocking_entrylk (frame, this);
+ break;
+ }
+
+ return 0;
+}
+
+
+int
+afr_lock (call_frame_t *frame, xlator_t *this)
+{
+ afr_set_lock_number (frame, this);
+
+ return afr_lock_rec (frame, this);
+}
+
+
/* }}} */
-/* {{{ lock */
+int
+afr_internal_lock_finish (call_frame_t *frame, xlator_t *this)
+{
+ if (__fop_changelog_needed (frame, this)) {
+ afr_changelog_pre_op (frame, this);
+ } else {
+ afr_transaction_perform_fop (frame, this);
+ }
+
+ return 0;
+}
-static
-int afr_lock_rec (call_frame_t *frame, xlator_t *this, int child_index);
-int32_t
-afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+void
+afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int done = 0;
- int child_index = (long) cookie;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- int call_count = 0;
+ /* call this function from any of the related optimizations
+ which benefit from delaying post op are enabled, namely:
- local = frame->local;
- priv = this->private;
+ - changelog piggybacking
+ - eager locking
+ */
- LOCK (&frame->lock);
- {
- if (local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
- /* wait for the other lock to return */
- call_count = --local->call_count;
- }
-
- if (op_ret == -1) {
- if (op_errno == ENOSYS) {
- /* return ENOTSUP */
- gf_log (this->name, GF_LOG_ERROR,
- "subvolume does not support locking. "
- "please load features/posix-locks xlator on server");
- local->op_ret = op_ret;
- done = 1;
- }
-
- local->child_up[child_index] = 0;
- local->op_errno = op_errno;
- }
- }
- UNLOCK (&frame->lock);
-
- if (call_count == 0) {
- if ((local->op_ret == -1) &&
- (local->op_errno == ENOSYS)) {
- afr_unlock (frame, this);
- } else {
- local->transaction.locked_nodes[child_index] = 1;
- local->transaction.lock_count++;
- afr_lock_rec (frame, this, child_index + 1);
- }
- }
+ priv = this->private;
+ if (!priv)
+ return;
+
+ if (!priv->post_op_delay_secs)
+ return;
+
+ local = frame->local;
+ if (!local->transaction.eager_lock_on)
+ return;
- return 0;
+ if (!local)
+ return;
+
+ if (!local->fd)
+ return;
+
+ if (local->op == GF_FOP_WRITE)
+ local->delayed_post_op = _gf_true;
+}
+
+gf_boolean_t
+afr_are_multiple_fds_opened (inode_t *inode, xlator_t *this)
+{
+ afr_inode_ctx_t *ictx = NULL;
+
+ if (!inode) {
+ /* If false is returned, it may keep on taking eager-lock
+ * which may lead to starvation, so return true to avoid that.
+ */
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid inode");
+ return _gf_true;
+ }
+ /* Lets say mount1 has eager-lock(full-lock) and after the eager-lock
+ * is taken mount2 opened the same file, it won't be able to
+ * perform any data operations until mount1 releases eager-lock.
+ * To avoid such scenario do not enable eager-lock for this transaction
+ * if open-fd-count is > 1
+ */
+
+ ictx = afr_inode_ctx_get (inode, this);
+ if (!ictx)
+ return _gf_true;
+
+ if (ictx->open_fd_count > 1)
+ return _gf_true;
+
+ return _gf_false;
}
+gf_boolean_t
+afr_any_fops_failed (afr_local_t *local, afr_private_t *priv)
+{
+ if (local->success_count != priv->child_count)
+ return _gf_true;
+ return _gf_false;
+}
-static loc_t *
-lower_path (loc_t *l1, const char *b1, loc_t *l2, const char *b2)
+gf_boolean_t
+is_afr_delayed_changelog_post_op_needed (call_frame_t *frame, xlator_t *this)
{
- int ret = 0;
+ afr_local_t *local = NULL;
+ gf_boolean_t res = _gf_false;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
- ret = strcmp (l1->path, l2->path);
-
- if (ret == 0)
- ret = strcmp (b1, b2);
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (!local->delayed_post_op)
+ goto out;
+
+ //Mark pending changelog ASAP
+ if (afr_any_fops_failed (local, priv))
+ goto out;
- if (ret <= 0)
- return l1;
- else
- return l2;
+ if (local->fd && afr_are_multiple_fds_opened (local->fd->inode, this))
+ goto out;
+
+ res = _gf_true;
+out:
+ return res;
}
-static
-int afr_lock_rec (call_frame_t *frame, xlator_t *this, int child_index)
+void
+afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd,
+ call_stub_t *stub);
+
+void
+afr_delayed_changelog_wake_up_cbk (void *data)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ fd_t *fd = NULL;
- struct flock flock;
+ fd = data;
- loc_t * lower = NULL;
- loc_t * higher = NULL;
+ afr_delayed_changelog_wake_up (THIS, fd);
+}
- const char *lower_name = NULL;
- const char *higher_name = NULL;
- local = frame->local;
- priv = this->private;
+/*
+ Check if the frame is destined to get optimized away
+ with changelog piggybacking
+*/
+static gf_boolean_t
+is_piggyback_post_op (call_frame_t *frame, fd_t *fd)
+{
+ afr_fd_ctx_t *fdctx = NULL;
+ afr_local_t *local = NULL;
+ gf_boolean_t piggyback = _gf_true;
+ afr_private_t *priv = NULL;
+ int i = 0;
- flock.l_start = local->transaction.start;
- flock.l_len = local->transaction.len;
- flock.l_type = F_WRLCK;
+ priv = frame->this->private;
+ local = frame->local;
+ fdctx = afr_fd_ctx_get (fd, frame->this);
- /* skip over children that are down */
- while ((child_index < priv->child_count)
- && !local->child_up[child_index])
- child_index++;
+ LOCK(&fd->lock);
+ {
+ piggyback = _gf_true;
- if ((child_index == priv->child_count) &&
- local->transaction.lock_count == 0) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.pre_op[i])
+ continue;
+ if (fdctx->pre_op_piggyback[i]) {
+ fdctx->pre_op_piggyback[i]--;
+ local->transaction.postop_piggybacked[i] = 1;
+ } else {
+ /* For at least _one_ subvolume we cannot
+ piggyback on the changelog, and have to
+ perform a hard POST-OP and therefore fsync
+ if necesssary
+ */
+ piggyback = _gf_false;
+ GF_ASSERT (fdctx->pre_op_done[i]);
+ fdctx->pre_op_done[i]--;
+ }
+ }
+ }
+ UNLOCK(&fd->lock);
- gf_log (this->name, GF_LOG_DEBUG,
- "unable to lock on even one child");
+ if (!afr_txn_nothing_failed (frame, frame->this)) {
+ /* something failed in this transaction,
+ we will be performing a hard post-op
+ */
+ return _gf_false;
+ }
- local->op_ret = -1;
- local->op_errno = EAGAIN;
+ return piggyback;
+}
- local->transaction.done (frame, this);
-
- return 0;
- }
+/* SET operation */
+int
+afr_fd_report_unstable_write (xlator_t *this, fd_t *fd)
+{
+ afr_fd_ctx_t *fdctx = NULL;
- if ((child_index == priv->child_count)
- || (local->transaction.lock_count ==
- afr_lock_server_count (priv, local->transaction.type))) {
+ fdctx = afr_fd_ctx_get (fd, this);
- /* we're done locking */
+ LOCK(&fd->lock);
+ {
+ fdctx->witnessed_unstable_write = _gf_true;
+ }
+ UNLOCK(&fd->lock);
- if (__changelog_needed_pre_op (frame, this)) {
- afr_changelog_pre_op (frame, this);
- } else {
- __mark_all_success (local->pending, priv->child_count,
- local->transaction.type);
+ return 0;
+}
- afr_pid_restore (frame);
+/* TEST and CLEAR operation */
+gf_boolean_t
+afr_fd_has_witnessed_unstable_write (xlator_t *this, fd_t *fd)
+{
+ afr_fd_ctx_t *fdctx = NULL;
+ gf_boolean_t witness = _gf_false;
- local->transaction.fop (frame, this);
- }
+ fdctx = afr_fd_ctx_get (fd, this);
+ if (!fdctx)
+ return _gf_true;
- return 0;
- }
+ LOCK(&fd->lock);
+ {
+ if (fdctx->witnessed_unstable_write) {
+ witness = _gf_true;
+ fdctx->witnessed_unstable_write = _gf_false;
+ }
+ }
+ UNLOCK (&fd->lock);
+
+ return witness;
+}
+
+
+int
+afr_changelog_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (afr_fop_failed (op_ret, op_errno)) {
+ /* Failure of fsync() is as good as failure of previous
+ write(). So treat it like one.
+ */
+ gf_log (this->name, GF_LOG_WARNING,
+ "fsync(%s) failed on subvolume %s. Transaction was %s",
+ uuid_utoa (local->fd->inode->gfid),
+ priv->children[child_index]->name,
+ gf_fop_list[local->op]);
+
+ afr_transaction_fop_failed (frame, this, child_index);
+ }
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ afr_changelog_post_op_now (frame, this);
+
+ return 0;
+}
+
+
+int
+afr_changelog_fsync (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ int i = 0;
+ int call_count = 0;
+ afr_private_t *priv = NULL;
+ dict_t *xdata = NULL;
+ GF_UNUSED int ret = -1;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (!call_count) {
+ /* will go straight to unlock */
+ afr_changelog_post_op_now (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ xdata = dict_new();
+ if (xdata)
+ ret = dict_set_int32 (xdata, "batch-fsync", 1);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.pre_op[i])
+ continue;
+
+ STACK_WIND_COOKIE (frame, afr_changelog_fsync_cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->fsync, local->fd,
+ 1, xdata);
+ if (!--call_count)
+ break;
+ }
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+
+ int
+afr_changelog_post_op_safe (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (!local->fd || local->transaction.type != AFR_DATA_TRANSACTION) {
+ afr_changelog_post_op_now (frame, this);
+ return 0;
+ }
+
+ if (is_piggyback_post_op (frame, local->fd)) {
+ /* just detected that this post-op is about to
+ be optimized away as a new write() has
+ already piggybacked on this frame's changelog.
+ */
+ afr_changelog_post_op_now (frame, this);
+ return 0;
+ }
+
+ /* Calling afr_changelog_post_op_now() now will result in
+ issuing ->[f]xattrop().
+
+ Performing a hard POST-OP (->[f]xattrop() FOP) is a more
+ responsible operation that what it might appear on the surface.
+
+ The changelog of a file (in the xattr of the file on the server)
+ stores information (pending count) about the state of the file
+ on the OTHER server. This changelog is blindly trusted, and must
+ therefore be updated in such a way it remains trustworthy. This
+ implies that decrementing the pending count (essentially "clearing
+ the dirty flag") must be done STRICTLY after we are sure that the
+ operation on the other server has reached stable storage.
+
+ While the backend filesystem on that server will eventually flush
+ it to stable storage, we (being in userspace) have no mechanism
+ to get notified when the write became "stable".
+
+ This means we need take matter into our own hands and issue an
+ fsync() EVEN IF THE APPLICATION WAS PERFORMING UNSTABLE WRITES,
+ and get an acknowledgement for it. And we need to wait for the
+ fsync() acknowledgement before initiating the hard POST-OP.
+
+ However if the FD itself was opened in O_SYNC or O_DSYNC then
+ we are already guaranteed that the writes were made stable as
+ part of the FOP itself. The same holds true for NFS stable
+ writes which happen on an anonymous FD with O_DSYNC or O_SYNC
+ flag set in the writev() @flags param. For all other write types,
+ mark a flag in the fdctx whenever an unstable write is witnessed.
+ */
+
+ if (!afr_fd_has_witnessed_unstable_write (this, local->fd)) {
+ afr_changelog_post_op_now (frame, this);
+ return 0;
+ }
- switch (local->transaction.type) {
- case AFR_DATA_TRANSACTION:
- case AFR_METADATA_TRANSACTION:
- case AFR_FLUSH_TRANSACTION:
-
- if (local->fd) {
- STACK_WIND_COOKIE (frame, afr_lock_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->finodelk,
- this->name, local->fd,
- F_SETLKW, &flock);
-
- } else {
- STACK_WIND_COOKIE (frame, afr_lock_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->inodelk,
- this->name, &local->loc,
- F_SETLKW, &flock);
- }
-
- break;
-
- case AFR_ENTRY_RENAME_TRANSACTION:
+ /* Check whether users want durability and perform fsync/post-op
+ * accordingly.
+ */
+ if (priv->ensure_durability) {
+ /* Time to fsync() */
+ afr_changelog_fsync (frame, this);
+ } else {
+ afr_changelog_post_op_now (frame, this);
+ }
+
+ return 0;
+}
+
+
+void
+afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd,
+ call_stub_t *stub)
+{
+ afr_fd_ctx_t *fd_ctx = NULL;
+ call_frame_t *prev_frame = NULL;
+ struct timespec delta = {0, };
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx)
+ goto out;
+
+ delta.tv_sec = priv->post_op_delay_secs;
+ delta.tv_nsec = 0;
+
+ pthread_mutex_lock (&fd_ctx->delay_lock);
{
- local->call_count = 2;
-
- lower = lower_path (&local->transaction.parent_loc,
- local->transaction.basename,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename);
-
- lower_name = (lower == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- higher = (lower == &local->transaction.parent_loc ?
- &local->transaction.new_parent_loc :
- &local->transaction.parent_loc);
-
- higher_name = (higher == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
-
- /* TODO: these locks should be blocking */
-
- STACK_WIND_COOKIE (frame, afr_lock_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->entrylk,
- this->name, lower, lower_name,
- ENTRYLK_LOCK, ENTRYLK_WRLCK);
-
- STACK_WIND_COOKIE (frame, afr_lock_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->entrylk,
- this->name, higher, higher_name,
- ENTRYLK_LOCK, ENTRYLK_WRLCK);
-
- break;
- }
-
- case AFR_ENTRY_TRANSACTION:
- if (local->fd) {
- STACK_WIND_COOKIE (frame, afr_lock_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->fentrylk,
- this->name, local->fd,
- local->transaction.basename,
- ENTRYLK_LOCK, ENTRYLK_WRLCK);
- } else {
- STACK_WIND_COOKIE (frame, afr_lock_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->entrylk,
- this->name,
- &local->transaction.parent_loc,
- local->transaction.basename,
- ENTRYLK_LOCK, ENTRYLK_WRLCK);
- }
-
- break;
+ prev_frame = fd_ctx->delay_frame;
+ fd_ctx->delay_frame = NULL;
+ if (fd_ctx->delay_timer)
+ gf_timer_call_cancel (this->ctx, fd_ctx->delay_timer);
+ fd_ctx->delay_timer = NULL;
+ if (!frame)
+ goto unlock;
+ fd_ctx->delay_timer = gf_timer_call_after (this->ctx, delta,
+ afr_delayed_changelog_wake_up_cbk,
+ fd);
+ fd_ctx->delay_frame = frame;
}
+unlock:
+ pthread_mutex_unlock (&fd_ctx->delay_lock);
- return 0;
+out:
+ if (prev_frame) {
+ local = prev_frame->local;
+ local->transaction.resume_stub = stub;
+ afr_changelog_post_op_safe (prev_frame, this);
+ } else if (stub) {
+ call_resume (stub);
+ }
}
-int32_t afr_lock (call_frame_t *frame, xlator_t *this)
+void
+afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
{
- afr_pid_save (frame);
+ afr_local_t *local = NULL;
- frame->root->pid = (long) frame->root;
+ local = frame->local;
- return afr_lock_rec (frame, this, 0);
+ if (is_afr_delayed_changelog_post_op_needed (frame, this))
+ afr_delayed_changelog_post_op (this, frame, local->fd, NULL);
+ else
+ afr_changelog_post_op_safe (frame, this);
}
-/* }}} */
-int32_t
+/* Wake up the sleeping/delayed post-op, and also register
+ a stub to have it resumed after this transaction
+ completely finishes.
+
+ The @stub gets saved in @local and gets resumed in
+ afr_local_cleanup()
+ */
+void
+afr_delayed_changelog_wake_resume (xlator_t *this, fd_t *fd, call_stub_t *stub)
+{
+ afr_delayed_changelog_post_op (this, NULL, fd, stub);
+}
+
+
+void
+afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd)
+{
+ afr_delayed_changelog_post_op (this, NULL, fd, NULL);
+}
+
+
+ int
afr_transaction_resume (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+
+ if (local->transaction.eager_lock_on) {
+ /* We don't need to retain "local" in the
+ fd list anymore, writes to all subvols
+ are finished by now */
+ LOCK (&local->fd->lock);
+ {
+ list_del_init (&local->transaction.eager_locked);
+ }
+ UNLOCK (&local->fd->lock);
+ }
- local = frame->local;
- priv = this->private;
-
- if (__changelog_needed_post_op (frame, this)) {
- afr_changelog_post_op (frame, this);
- } else {
- if (afr_lock_server_count (priv, local->transaction.type) == 0) {
- local->transaction.done (frame, this);
- } else {
- afr_unlock (frame, this);
- }
- }
+ afr_restore_lk_owner (frame);
- return 0;
+ if (__fop_changelog_needed (frame, this)) {
+ afr_changelog_post_op (frame, this);
+ } else {
+ if (afr_lock_server_count (priv, local->transaction.type) == 0) {
+ local->transaction.done (frame, this);
+ } else {
+ int_lock->lock_cbk = local->transaction.done;
+ afr_unlock (frame, this);
+ }
+ }
+
+ return 0;
}
@@ -1147,54 +1820,144 @@ afr_transaction_resume (call_frame_t *frame, xlator_t *this)
*/
void
-afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this, int child_index)
+afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this,
+ int child_index)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
+ priv = this->private;
- switch (local->op) {
- case GF_FOP_WRITE:
- __mark_fop_failed_on_fd (local->fd, this, child_index);
- break;
- default:
- __mark_child_dead (local->pending, priv->child_count,
- child_index, local->transaction.type);
- break;
+ __mark_child_dead (local->pending, priv->child_count,
+ child_index, local->transaction.type);
+}
+
+
+
+ static gf_boolean_t
+afr_locals_overlap (afr_local_t *local1, afr_local_t *local2)
+{
+ uint64_t start1 = local1->transaction.start;
+ uint64_t start2 = local2->transaction.start;
+ uint64_t end1 = 0;
+ uint64_t end2 = 0;
+
+ if (local1->transaction.len)
+ end1 = start1 + local1->transaction.len - 1;
+ else
+ end1 = ULLONG_MAX;
+
+ if (local2->transaction.len)
+ end2 = start2 + local2->transaction.len - 1;
+ else
+ end2 = ULLONG_MAX;
+
+ return ((end1 >= start2) && (end2 >= start1));
+}
+
+void
+afr_transaction_eager_lock_init (afr_local_t *local, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fdctx = NULL;
+ afr_local_t *each = NULL;
+
+ priv = this->private;
+
+ if (!local->fd)
+ return;
+
+ if (local->transaction.type != AFR_DATA_TRANSACTION)
+ return;
+
+ if (!priv->eager_lock)
+ return;
+
+ fdctx = afr_fd_ctx_get (local->fd, this);
+ if (!fdctx)
+ return;
+
+ if (afr_are_multiple_fds_opened (local->fd->inode, this))
+ return;
+ /*
+ * Once full file lock is acquired in eager-lock phase, overlapping
+ * writes do not compete for inode-locks, instead are transferred to the
+ * next writes. Because of this overlapping writes are not ordered.
+ * This can cause inconsistencies in replication.
+ * Example:
+ * Two overlapping writes w1, w2 are sent in parallel on same fd
+ * in two threads t1, t2.
+ * Both threads can execute afr_writev_wind in the following manner.
+ * t1 winds w1 on brick-0
+ * t2 winds w2 on brick-0
+ * t2 winds w2 on brick-1
+ * t1 winds w1 on brick-1
+ *
+ * This check makes sure the locks are not transferred for
+ * overlapping writes.
+ */
+ LOCK (&local->fd->lock);
+ {
+ list_for_each_entry (each, &fdctx->eager_locked,
+ transaction.eager_locked) {
+ if (afr_locals_overlap (each, local)) {
+ local->transaction.eager_lock_on = _gf_false;
+ goto unlock;
+ }
+ }
+
+ local->transaction.eager_lock_on = _gf_true;
+ list_add_tail (&local->transaction.eager_locked,
+ &fdctx->eager_locked);
}
+unlock:
+ UNLOCK (&local->fd->lock);
}
-int32_t
+int
afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ fd_t *fd = NULL;
+ int ret = -1;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
+ priv = this->private;
- afr_transaction_local_init (local, priv);
+ local->transaction.resume = afr_transaction_resume;
+ local->transaction.type = type;
- local->transaction.resume = afr_transaction_resume;
- local->transaction.type = type;
+ ret = afr_transaction_local_init (local, this);
+ if (ret < 0)
+ goto out;
- if (afr_lock_server_count (priv, local->transaction.type) == 0) {
- if (__changelog_needed_pre_op (frame, this)) {
- afr_changelog_pre_op (frame, this);
- } else {
- __mark_all_success (local->pending, priv->child_count,
- local->transaction.type);
+ afr_transaction_eager_lock_init (local, this);
- afr_pid_restore (frame);
+ if (local->fd && local->transaction.eager_lock_on)
+ afr_set_lk_owner (frame, this, local->fd);
+ else
+ afr_set_lk_owner (frame, this, frame->root);
- local->transaction.fop (frame, this);
- }
- } else {
- afr_lock (frame, this);
- }
+ if (!local->transaction.eager_lock_on && local->loc.inode) {
+ fd = fd_lookup (local->loc.inode, frame->root->pid);
+ if (fd == NULL)
+ fd = fd_lookup_anonymous (local->loc.inode);
- return 0;
+ if (fd) {
+ afr_delayed_changelog_wake_up (this, fd);
+ fd_unref (fd);
+ }
+ }
+
+ if (afr_lock_server_count (priv, local->transaction.type) == 0) {
+ afr_internal_lock_finish (frame, this);
+ } else {
+ afr_lock (frame, this);
+ }
+ ret = 0;
+out:
+ return ret;
}
diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h
index c7a6490e7..fa626fd0d 100644
--- a/xlators/cluster/afr/src/afr-transaction.h
+++ b/xlators/cluster/afr/src/afr-transaction.h
@@ -1,30 +1,51 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __TRANSACTION_H__
#define __TRANSACTION_H__
+typedef enum {
+ LOCAL_FIRST = 1,
+ LOCAL_LAST = 2
+} afr_xattrop_type_t;
+
void
afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this,
int child_index);
+int
+afr_lock_server_count (afr_private_t *priv, afr_transaction_type type);
+
+afr_inodelk_t*
+afr_get_inodelk (afr_internal_lock_t *int_lock, char *dom);
+
int32_t
afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type);
+afr_fd_ctx_t *
+afr_fd_ctx_get (fd_t *fd, xlator_t *this);
+int
+afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending,
+ int child, afr_xattrop_type_t op);
+void
+afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this);
+
+void
+afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd);
+
+void
+__mark_all_success (int32_t *pending[], int child_count,
+ afr_transaction_type type);
+gf_boolean_t
+afr_any_fops_failed (afr_local_t *local, afr_private_t *priv);
+
+gf_boolean_t
+afr_txn_nothing_failed (call_frame_t *frame, xlator_t *this);
#endif /* __TRANSACTION_H__ */
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index 31c81ffd4..c724eb2ae 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <libgen.h>
@@ -28,2579 +19,775 @@
#define _CONFIG_H
#include "config.h"
#endif
+#include "afr-common.c"
-#include "glusterfs.h"
-#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "byte-order.h"
-
-#include "fd.h"
-
-#include "afr-inode-read.h"
-#include "afr-inode-write.h"
-#include "afr-dir-read.h"
-#include "afr-dir-write.h"
-#include "afr-transaction.h"
-
-#include "afr-self-heal.h"
-
-
-uint64_t
-afr_is_split_brain (xlator_t *this, inode_t *inode)
-{
- int ret = 0;
-
- uint64_t ctx = 0;
- uint64_t split_brain = 0;
-
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx);
-
- if (ret < 0)
- goto unlock;
-
- split_brain = ctx & 0xFFFFFFFF00000000ULL;
- }
-unlock:
- UNLOCK (&inode->lock);
-
- return split_brain;
-}
+#define SHD_INODE_LRU_LIMIT 2048
+#define AFR_EH_HEALED_LIMIT 1024
+#define AFR_EH_HEAL_FAIL_LIMIT 1024
+#define AFR_EH_SPLIT_BRAIN_LIMIT 1024
+struct volume_options options[];
-void
-afr_set_split_brain (xlator_t *this, inode_t *inode, int32_t split_brain)
+int32_t
+notify (xlator_t *this, int32_t event,
+ void *data, ...)
{
- uint64_t ctx = 0;
- int ret = 0;
+ int ret = -1;
+ va_list ap;
+ void *data2 = NULL;
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx);
+ va_start (ap, data);
+ data2 = va_arg (ap, dict_t*);
+ va_end (ap);
+ ret = afr_notify (this, event, data, data2);
- if (ret < 0) {
- ctx = 0;
- }
+ return ret;
+}
- ctx = (0x00000000FFFFFFFFULL & ctx)
- | (split_brain & 0xFFFFFFFF00000000ULL);
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
- __inode_ctx_put (inode, this, ctx);
- }
- UNLOCK (&inode->lock);
-}
+ if (!this)
+ return ret;
+ ret = xlator_mem_acct_init (this, gf_afr_mt_end + 1);
-uint64_t
-afr_read_child (xlator_t *this, inode_t *inode)
-{
- int ret = 0;
-
- uint64_t ctx = 0;
- uint64_t read_child = 0;
-
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx);
-
- if (ret < 0)
- goto unlock;
-
- read_child = ctx & 0x00000000FFFFFFFFULL;
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ return ret;
}
-unlock:
- UNLOCK (&inode->lock);
- return read_child;
+ return ret;
}
-void
-afr_set_read_child (xlator_t *this, inode_t *inode, int32_t read_child)
+int
+xlator_subvolume_index (xlator_t *this, xlator_t *subvol)
{
- uint64_t ctx = 0;
- int ret = 0;
+ int index = -1;
+ int i = 0;
+ xlator_list_t *list = NULL;
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx);
+ list = this->children;
- if (ret < 0) {
- ctx = 0;
+ while (list) {
+ if (subvol == list->xlator ||
+ strcmp (subvol->name, list->xlator->name) == 0) {
+ index = i;
+ break;
}
-
- ctx = (0xFFFFFFFF00000000ULL & ctx)
- | (0x00000000FFFFFFFFULL & read_child);
-
- __inode_ctx_put (inode, this, ctx);
+ list = list->next;
+ i++;
}
- UNLOCK (&inode->lock);
-}
-
-/**
- * afr_local_cleanup - cleanup everything in frame->local
- */
-
-void
-afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)
-{
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
-
-
- sh = &local->self_heal;
- priv = this->private;
-
- if (sh->buf)
- FREE (sh->buf);
-
- if (sh->xattr) {
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i]) {
- dict_unref (sh->xattr[i]);
- sh->xattr[i] = NULL;
- }
- }
- FREE (sh->xattr);
- }
-
- if (sh->child_errno)
- FREE (sh->child_errno);
-
- if (sh->pending_matrix) {
- for (i = 0; i < priv->child_count; i++) {
- FREE (sh->pending_matrix[i]);
- }
- FREE (sh->pending_matrix);
- }
-
- if (sh->delta_matrix) {
- for (i = 0; i < priv->child_count; i++) {
- FREE (sh->delta_matrix[i]);
- }
- FREE (sh->delta_matrix);
- }
-
- if (sh->sources)
- FREE (sh->sources);
-
- if (sh->success)
- FREE (sh->success);
-
- if (sh->healing_fd) {
- fd_unref (sh->healing_fd);
- sh->healing_fd = NULL;
- }
-
- loc_wipe (&sh->parent_loc);
+ return index;
}
-
-void
-afr_local_cleanup (afr_local_t *local, xlator_t *this)
+void
+fix_quorum_options (xlator_t *this, afr_private_t *priv, char *qtype)
{
- int i;
- afr_private_t * priv = NULL;
-
- if (!local)
- return;
-
- afr_local_sh_cleanup (local, this);
-
- FREE (local->child_errno);
-
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->pending && local->pending[i])
- FREE (local->pending[i]);
+ if (priv->quorum_count && strcmp(qtype,"fixed")) {
+ gf_log(this->name,GF_LOG_WARNING,
+ "quorum-type %s overriding quorum-count %u",
+ qtype, priv->quorum_count);
+ }
+ if (!strcmp(qtype,"none")) {
+ priv->quorum_count = 0;
+ }
+ else if (!strcmp(qtype,"auto")) {
+ priv->quorum_count = AFR_QUORUM_AUTO;
}
-
- FREE (local->pending);
-
- loc_wipe (&local->loc);
- loc_wipe (&local->newloc);
-
- FREE (local->transaction.locked_nodes);
- FREE (local->transaction.child_errno);
-
- FREE (local->transaction.basename);
- FREE (local->transaction.new_basename);
-
- loc_wipe (&local->transaction.parent_loc);
- loc_wipe (&local->transaction.new_parent_loc);
-
- if (local->fd)
- fd_unref (local->fd);
-
- if (local->xattr_req)
- dict_unref (local->xattr_req);
-
- FREE (local->child_up);
-
- { /* lookup */
- if (local->cont.lookup.xattr)
- dict_unref (local->cont.lookup.xattr);
- }
-
- { /* getxattr */
- if (local->cont.getxattr.name)
- FREE (local->cont.getxattr.name);
- }
-
- { /* lk */
- if (local->cont.lk.locked_nodes)
- FREE (local->cont.lk.locked_nodes);
- }
-
- { /* checksum */
- if (local->cont.checksum.file_checksum)
- FREE (local->cont.checksum.file_checksum);
- if (local->cont.checksum.dir_checksum)
- FREE (local->cont.checksum.dir_checksum);
- }
-
- { /* create */
- if (local->cont.create.fd)
- fd_unref (local->cont.create.fd);
- }
-
- { /* writev */
- FREE (local->cont.writev.vector);
- }
-
- { /* setxattr */
- if (local->cont.setxattr.dict)
- dict_unref (local->cont.setxattr.dict);
- }
-
- { /* removexattr */
- FREE (local->cont.removexattr.name);
- }
-
- { /* symlink */
- FREE (local->cont.symlink.linkpath);
- }
}
-
int
-afr_frame_return (call_frame_t *frame)
+reconfigure (xlator_t *this, dict_t *options)
{
- afr_local_t *local = NULL;
- int call_count = 0;
+ afr_private_t *priv = NULL;
+ xlator_t *read_subvol = NULL;
+ int read_subvol_index = -1;
+ int ret = -1;
+ int index = -1;
+ char *qtype = NULL;
- local = frame->local;
+ priv = this->private;
- LOCK (&frame->lock);
- {
- call_count = --local->call_count;
- }
- UNLOCK (&frame->lock);
+ GF_OPTION_RECONF ("background-self-heal-count",
+ priv->background_self_heal_count, options, uint32,
+ out);
- return call_count;
-}
+ GF_OPTION_RECONF ("metadata-self-heal",
+ priv->metadata_self_heal, options, bool, out);
-/**
- * first_up_child - return the index of the first child that is up
- */
+ GF_OPTION_RECONF ("data-self-heal", priv->data_self_heal, options, str,
+ out);
-int
-afr_first_up_child (afr_private_t *priv)
-{
- xlator_t ** children = NULL;
- int ret = -1;
- int i = 0;
-
- LOCK (&priv->lock);
- {
- children = priv->children;
- for (i = 0; i < priv->child_count; i++) {
- if (priv->child_up[i]) {
- ret = i;
- break;
- }
- }
- }
- UNLOCK (&priv->lock);
-
- return ret;
-}
+ GF_OPTION_RECONF ("entry-self-heal", priv->entry_self_heal, options,
+ bool, out);
+ GF_OPTION_RECONF ("strict-readdir", priv->strict_readdir, options, bool,
+ out);
-/**
- * up_children_count - return the number of children that are up
- */
+ GF_OPTION_RECONF ("data-self-heal-window-size",
+ priv->data_self_heal_window_size, options,
+ uint32, out);
-int
-afr_up_children_count (int child_count, unsigned char *child_up)
-{
- int i = 0;
- int ret = 0;
+ GF_OPTION_RECONF ("data-change-log", priv->data_change_log, options,
+ bool, out);
- for (i = 0; i < child_count; i++)
- if (child_up[i])
- ret++;
- return ret;
-}
+ GF_OPTION_RECONF ("metadata-change-log",
+ priv->metadata_change_log, options, bool, out);
+ GF_OPTION_RECONF ("entry-change-log", priv->entry_change_log, options,
+ bool, out);
-int
-afr_locked_nodes_count (unsigned char *locked_nodes, int child_count)
-{
- int ret = 0;
- int i;
+ GF_OPTION_RECONF ("data-self-heal-algorithm",
+ priv->data_self_heal_algorithm, options, str, out);
- for (i = 0; i < child_count; i++)
- if (locked_nodes[i])
- ret++;
+ GF_OPTION_RECONF ("self-heal-daemon", priv->shd.enabled, options, bool, out);
- return ret;
-}
+ GF_OPTION_RECONF ("read-subvolume", read_subvol, options, xlator, out);
+ GF_OPTION_RECONF ("read-hash-mode", priv->hash_mode,
+ options, uint32, out);
-ino64_t
-afr_itransform (ino64_t ino, int child_count, int child_index)
-{
- ino64_t scaled_ino = -1;
+ if (read_subvol) {
+ index = xlator_subvolume_index (this, read_subvol);
+ if (index == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "%s not a subvolume",
+ read_subvol->name);
+ goto out;
+ }
+ priv->read_child = index;
+ }
- if (ino == ((uint64_t) -1)) {
- scaled_ino = ((uint64_t) -1);
- goto out;
- }
+ GF_OPTION_RECONF ("read-subvolume-index",read_subvol_index, options,int32,out);
- scaled_ino = (ino * child_count) + child_index;
+ if (read_subvol_index >-1) {
+ index=read_subvol_index;
+ if (index >= priv->child_count) {
+ gf_log (this->name, GF_LOG_ERROR, "%d not a subvolume-index",
+ index);
+ goto out;
+ }
+ priv->read_child = index;
+ }
+ GF_OPTION_RECONF ("eager-lock", priv->eager_lock, options, bool, out);
+ GF_OPTION_RECONF ("quorum-type", qtype, options, str, out);
+ GF_OPTION_RECONF ("quorum-count", priv->quorum_count, options,
+ uint32, out);
+ fix_quorum_options(this,priv,qtype);
+ GF_OPTION_RECONF ("heal-timeout", priv->shd.timeout, options,
+ int32, out);
+
+ GF_OPTION_RECONF ("post-op-delay-secs", priv->post_op_delay_secs, options,
+ uint32, out);
+
+ GF_OPTION_RECONF (AFR_SH_READDIR_SIZE_KEY, priv->sh_readdir_size,
+ options, size, out);
+ /* Reset this so we re-discover in case the topology changed. */
+ GF_OPTION_RECONF ("readdir-failover", priv->readdir_failover, options,
+ bool, out);
+ GF_OPTION_RECONF ("ensure-durability", priv->ensure_durability, options,
+ bool, out);
+ priv->did_discovery = _gf_false;
+
+ ret = 0;
out:
- return scaled_ino;
-}
-
-
-int
-afr_deitransform_orig (ino64_t ino, int child_count)
-{
- int index = -1;
-
- index = ino % child_count;
-
- return index;
-}
-
-
-int
-afr_deitransform (ino64_t ino, int child_count)
-{
- return 0;
-}
-
-
-int
-afr_self_heal_cbk (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- if (local->govinda_gOvinda) {
- afr_set_split_brain (this, local->cont.lookup.inode, 1);
- } else {
- afr_set_split_brain (this, local->cont.lookup.inode, 0);
- }
-
- AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->cont.lookup.inode,
- &local->cont.lookup.buf,
- local->cont.lookup.xattr);
+ return ret;
- return 0;
}
-int
-afr_lookup_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *buf, dict_t *xattr)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- struct stat * lookup_buf = NULL;
-
- int call_count = -1;
- int child_index = -1;
- int first_up_child = -1;
-
- uint32_t open_fd_count = 0;
- int ret = 0;
-
- child_index = (long) cookie;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- local = frame->local;
-
- lookup_buf = &local->cont.lookup.buf;
-
- if (op_ret == -1) {
- if (op_errno == ENOENT)
- local->enoent_count++;
-
- if (op_errno != ENOTCONN) {
- if (local->op_errno != ESTALE)
- local->op_errno = op_errno;
- }
-
- if (op_errno == ESTALE) {
- /* no matter what other subvolumes return for
- * this call, ESTALE _must_ be sent to parent
- */
- local->op_ret = -1;
- local->op_errno = ESTALE;
- }
- goto unlock;
- }
-
- if (afr_sh_has_metadata_pending (xattr, child_index, this))
- local->need_metadata_self_heal = 1;
-
- if (afr_sh_has_entry_pending (xattr, child_index, this))
- local->need_entry_self_heal = 1;
-
- if (afr_sh_has_data_pending (xattr, child_index, this))
- local->need_data_self_heal = 1;
-
- ret = dict_get_uint32 (xattr, GLUSTERFS_OPEN_FD_COUNT,
- &open_fd_count);
- local->open_fd_count += open_fd_count;
-
- /* in case of revalidate, we need to send stat of the
- * child whose stat was sent during the first lookup.
- * (so that time stamp does not vary with revalidate.
- * in case it is down, stat of the fist success will
- * be replied */
-
- /* inode number should be preserved across revalidates */
-
- if (local->success_count == 0) {
- if (local->op_errno != ESTALE)
- local->op_ret = op_ret;
-
- local->cont.lookup.inode = inode;
- local->cont.lookup.xattr = dict_ref (xattr);
-
- *lookup_buf = *buf;
-
- lookup_buf->st_ino = afr_itransform (buf->st_ino,
- priv->child_count,
- child_index);
-
- if (priv->read_child >= 0) {
- afr_set_read_child (this,
- local->cont.lookup.inode,
- priv->read_child);
- } else {
- afr_set_read_child (this,
- local->cont.lookup.inode,
- child_index);
- }
-
- } else {
- if (FILETYPE_DIFFERS (buf, lookup_buf)) {
- /* mismatching filetypes with same name
- -- Govinda !! GOvinda !!!
- */
- local->govinda_gOvinda = 1;
- }
-
- if (PERMISSION_DIFFERS (buf, lookup_buf)) {
- /* mismatching permissions */
- local->need_metadata_self_heal = 1;
- }
-
- if (OWNERSHIP_DIFFERS (buf, lookup_buf)) {
- /* mismatching permissions */
- local->need_metadata_self_heal = 1;
- }
-
- if (SIZE_DIFFERS (buf, lookup_buf)
- && S_ISREG (buf->st_mode)) {
- local->need_data_self_heal = 1;
- }
-
- first_up_child = afr_first_up_child (priv);
-
- if (child_index == first_up_child) {
- local->cont.lookup.buf.st_ino =
- afr_itransform (buf->st_ino,
- priv->child_count,
- first_up_child);
- }
-
- if (child_index == local->read_child_index) {
-
- /*
- lookup has succeeded on the read child.
- So use its inode number
- */
- if (local->op_errno != ESTALE)
- local->op_ret = op_ret;
-
- if (local->cont.lookup.xattr)
- dict_unref (local->cont.lookup.xattr);
-
- local->cont.lookup.inode = inode;
- local->cont.lookup.xattr = dict_ref (xattr);
-
- *lookup_buf = *buf;
-
- if (priv->read_child >= 0) {
- afr_set_read_child (this,
- local->cont.lookup.inode,
- priv->read_child);
- } else {
- afr_set_read_child (this,
- local->cont.lookup.inode,
- local->read_child_index);
- }
- }
-
- }
-
- local->success_count++;
- }
-unlock:
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (local->op_ret == 0) {
- /* KLUDGE: assuming DHT will not itransform in
- revalidate */
- if (local->cont.lookup.inode->ino)
- lookup_buf->st_ino =
- local->cont.lookup.inode->ino;
- }
-
- if (local->success_count && local->enoent_count) {
- local->need_metadata_self_heal = 1;
- local->need_data_self_heal = 1;
- local->need_entry_self_heal = 1;
- }
-
- if (local->success_count) {
- /* check for split-brain case in previous lookup */
- if (afr_is_split_brain (this,
- local->cont.lookup.inode))
- local->need_data_self_heal = 1;
- }
-
- if ((local->need_metadata_self_heal
- || local->need_data_self_heal
- || local->need_entry_self_heal)
- && (!local->open_fd_count)) {
-
- if (!local->cont.lookup.inode->st_mode) {
- /* fix for RT #602 */
- local->cont.lookup.inode->st_mode =
- lookup_buf->st_mode;
- }
-
- afr_self_heal (frame, this, afr_self_heal_cbk);
- } else {
- AFR_STACK_UNWIND (frame, local->op_ret,
- local->op_errno,
- local->cont.lookup.inode,
- &local->cont.lookup.buf,
- local->cont.lookup.xattr);
- }
- }
-
- return 0;
-}
+static const char *favorite_child_warning_str = "You have specified subvolume '%s' "
+ "as the 'favorite child'. This means that if a discrepancy in the content "
+ "or attributes (ownership, permission, etc.) of a file is detected among "
+ "the subvolumes, the file on '%s' will be considered the definitive "
+ "version and its contents will OVERWRITE the contents of the file on other "
+ "subvolumes. All versions of the file except that on '%s' "
+ "WILL BE LOST.";
-int
-afr_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
+int32_t
+init (xlator_t *this)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int ret = -1;
- int i = 0;
-
- uint64_t ctx;
-
- int32_t op_errno = 0;
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- local->op_ret = -1;
-
- frame->local = local;
-
- loc_copy (&local->loc, loc);
-
- ret = inode_ctx_get (loc->inode, this, &ctx);
- if (ret == 0) {
- /* lookup is a revalidate */
-
- local->read_child_index = afr_read_child (this, loc->inode);
- } else {
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
- }
- UNLOCK (&priv->read_child_lock);
+ afr_private_t *priv = NULL;
+ int child_count = 0;
+ xlator_list_t *trav = NULL;
+ int i = 0;
+ int ret = -1;
+ GF_UNUSED int op_errno = 0;
+ xlator_t *read_subvol = NULL;
+ int read_subvol_index = -1;
+ xlator_t *fav_child = NULL;
+ char *qtype = NULL;
+
+ if (!this->children) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "replicate translator needs more than one "
+ "subvolume defined.");
+ return -1;
}
- local->call_count = priv->child_count;
-
- local->child_up = memdup (priv->child_up, priv->child_count);
- local->child_count = afr_up_children_count (priv->child_count,
- local->child_up);
-
- /* By default assume ENOTCONN. On success it will be set to 0. */
- local->op_errno = ENOTCONN;
-
- if (xattr_req == NULL)
- local->xattr_req = dict_new ();
- else
- local->xattr_req = dict_ref (xattr_req);
-
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_uint64 (local->xattr_req, priv->pending_key[i],
- 3 * sizeof(int32_t));
-
- /* 3 = data+metadata+entry */
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Volume is dangling.");
}
- ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_OPEN_FD_COUNT, 0);
-
- for (i = 0; i < priv->child_count; i++) {
- STACK_WIND_COOKIE (frame, afr_lookup_cbk, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->lookup,
- loc, local->xattr_req);
- }
-
- ret = 0;
-out:
- if (ret == -1)
- AFR_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL);
-
- return 0;
-}
-
-
-/* {{{ open */
-
-int
-afr_fd_ctx_set (xlator_t *this, fd_t *fd)
-{
- afr_private_t * priv = NULL;
-
- int op_ret = 0;
- int ret = 0;
-
- uint64_t ctx;
- afr_fd_ctx_t * fd_ctx = NULL;
+ this->private = GF_CALLOC (1, sizeof (afr_private_t),
+ gf_afr_mt_afr_private_t);
+ if (!this->private)
+ goto out;
priv = this->private;
+ LOCK_INIT (&priv->lock);
+ LOCK_INIT (&priv->read_child_lock);
+ //lock recovery is not done in afr
+ pthread_mutex_init (&priv->mutex, NULL);
+ INIT_LIST_HEAD (&priv->saved_fds);
- LOCK (&fd->lock);
- {
- ret = __fd_ctx_get (fd, this, &ctx);
-
- if (ret == 0)
- goto out;
+ child_count = xlator_subvolume_count (this);
- fd_ctx = CALLOC (1, sizeof (afr_fd_ctx_t));
- if (!fd_ctx) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
-
- op_ret = -ENOMEM;
- goto out;
- }
+ priv->child_count = child_count;
- fd_ctx->child_failed = CALLOC (sizeof (*fd_ctx->child_failed),
- priv->child_count);
-
- if (!fd_ctx->child_failed) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
+ priv->read_child = -1;
- op_ret = -ENOMEM;
+ GF_OPTION_INIT ("read-subvolume", read_subvol, xlator, out);
+ if (read_subvol) {
+ priv->read_child = xlator_subvolume_index (this, read_subvol);
+ if (priv->read_child == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "%s not a subvolume",
+ read_subvol->name);
goto out;
}
-
- ret = __fd_ctx_set (fd, this, (uint64_t)(long) fd_ctx);
- if (ret < 0) {
- op_ret = ret;
+ }
+ GF_OPTION_INIT ("read-subvolume-index",read_subvol_index,int32,out);
+ if (read_subvol_index > -1) {
+ if (read_subvol_index >= priv->child_count) {
+ gf_log (this->name, GF_LOG_ERROR, "%d not a subvolume-index",
+ read_subvol_index);
+ goto out;
}
+ priv->read_child = read_subvol_index;
}
-out:
- UNLOCK (&fd->lock);
+ GF_OPTION_INIT ("choose-local", priv->choose_local, bool, out);
- return ret;
-}
+ GF_OPTION_INIT ("read-hash-mode", priv->hash_mode, uint32, out);
-
-int
-afr_open_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
-{
- afr_local_t * local = frame->local;
- int ret = 0;
-
- ret = afr_fd_ctx_set (this, local->fd);
-
- if (ret < 0) {
- local->op_ret = -1;
- local->op_errno = -ret;
+ priv->favorite_child = -1;
+ GF_OPTION_INIT ("favorite-child", fav_child, xlator, out);
+ if (fav_child) {
+ priv->favorite_child = xlator_subvolume_index (this, fav_child);
+ if (priv->favorite_child == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "%s not a subvolume",
+ fav_child->name);
+ goto out;
+ }
+ gf_log (this->name, GF_LOG_WARNING,
+ favorite_child_warning_str, fav_child->name,
+ fav_child->name, fav_child->name);
}
- AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->fd);
- return 0;
-}
-
-
-int
-afr_open_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- fd_t *fd)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int ret = 0;
-
- int call_count = -1;
-
- priv = this->private;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- }
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if ((local->cont.open.flags & O_TRUNC)
- && (local->op_ret >= 0)) {
- STACK_WIND (frame, afr_open_ftruncate_cbk,
- this, this->fops->ftruncate,
- fd, 0);
- } else {
- ret = afr_fd_ctx_set (this, fd);
-
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not set fd ctx for fd=%p",
- fd);
-
- local->op_ret = -1;
- local->op_errno = -ret;
- }
-
- AFR_STACK_UNWIND (frame, local->op_ret,
- local->op_errno, local->fd);
- }
- }
-
- return 0;
-}
-
-
-int
-afr_open (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, fd_t *fd)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
-
- int i = 0;
- int ret = -1;
-
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int32_t wind_flags = flags & (~O_TRUNC);
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (loc, out);
-
- priv = this->private;
-
- if (afr_is_split_brain (this, loc->inode)) {
- /* self-heal failed */
- op_errno = EIO;
- goto out;
- }
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- frame->local = local;
- call_count = local->call_count;
-
- local->cont.open.flags = flags;
- local->fd = fd_ref (fd);
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_open_cbk, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->open,
- loc, wind_flags, fd);
-
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno, fd);
- }
-
- return 0;
-}
-/* }}} */
+ GF_OPTION_INIT ("background-self-heal-count",
+ priv->background_self_heal_count, uint32, out);
-/* {{{ flush */
+ GF_OPTION_INIT ("data-self-heal", priv->data_self_heal, str, out);
-int
-afr_flush_unwind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- call_frame_t *main_frame = NULL;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno);
- }
-
- return 0;
-}
-
-
-int
-afr_flush_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int child_index = (long) cookie;
- int need_unwind = 0;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
-
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- if (need_unwind)
- afr_flush_unwind (frame, this);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
-}
+ GF_OPTION_INIT ("data-self-heal-algorithm",
+ priv->data_self_heal_algorithm, str, out);
+ GF_OPTION_INIT ("data-self-heal-window-size",
+ priv->data_self_heal_window_size, uint32, out);
-int
-afr_flush_wind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int i = 0;
- int call_count = -1;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_flush_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->flush,
- local->fd);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
+ GF_OPTION_INIT ("metadata-self-heal", priv->metadata_self_heal, bool,
+ out);
+ GF_OPTION_INIT ("entry-self-heal", priv->entry_self_heal, bool, out);
-int
-afr_flush_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
+ GF_OPTION_INIT ("self-heal-daemon", priv->shd.enabled, bool, out);
- local = frame->local;
+ GF_OPTION_INIT ("iam-self-heal-daemon", priv->shd.iamshd, bool, out);
- local->transaction.unwind (frame, this);
+ GF_OPTION_INIT ("data-change-log", priv->data_change_log, bool, out);
- AFR_STACK_DESTROY (frame);
+ GF_OPTION_INIT ("metadata-change-log", priv->metadata_change_log, bool,
+ out);
- return 0;
-}
+ GF_OPTION_INIT ("entry-change-log", priv->entry_change_log, bool, out);
+ GF_OPTION_INIT ("optimistic-change-log", priv->optimistic_change_log,
+ bool, out);
-int
-afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
+ GF_OPTION_INIT ("inodelk-trace", priv->inodelk_trace, bool, out);
- call_frame_t * transaction_frame = NULL;
+ GF_OPTION_INIT ("entrylk-trace", priv->entrylk_trace, bool, out);
- int ret = -1;
+ GF_OPTION_INIT ("strict-readdir", priv->strict_readdir, bool, out);
- int op_ret = -1;
- int op_errno = 0;
+ GF_OPTION_INIT ("eager-lock", priv->eager_lock, bool, out);
+ GF_OPTION_INIT ("quorum-type", qtype, str, out);
+ GF_OPTION_INIT ("quorum-count", priv->quorum_count, uint32, out);
+ GF_OPTION_INIT (AFR_SH_READDIR_SIZE_KEY, priv->sh_readdir_size, size,
+ out);
+ fix_quorum_options(this,priv,qtype);
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ GF_OPTION_INIT ("post-op-delay-secs", priv->post_op_delay_secs, uint32, out);
+ GF_OPTION_INIT ("readdir-failover", priv->readdir_failover, bool, out);
+ GF_OPTION_INIT ("ensure-durability", priv->ensure_durability, bool,
+ out);
- priv = this->private;
+ priv->wait_count = 1;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
+ priv->child_up = GF_CALLOC (sizeof (unsigned char), child_count,
+ gf_afr_mt_char);
+ if (!priv->child_up) {
+ ret = -ENOMEM;
+ goto out;
}
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- transaction_frame->local = local;
-
- local->op = GF_FOP_FLUSH;
-
- local->transaction.fop = afr_flush_wind;
- local->transaction.done = afr_flush_done;
- local->transaction.unwind = afr_flush_unwind;
-
- local->fd = fd_ref (fd);
-
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
-
- afr_transaction (transaction_frame, this, AFR_FLUSH_TRANSACTION);
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
-
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
-
- return 0;
-}
-
-/* }}} */
-
-
-int
-afr_release (xlator_t *this, fd_t *fd)
-{
- uint64_t ctx;
- afr_fd_ctx_t * fd_ctx;
-
- int ret = 0;
-
- ret = fd_ctx_get (fd, this, &ctx);
-
- if (ret < 0)
+ for (i = 0; i < child_count; i++)
+ priv->child_up[i] = -1; /* start with unknown state.
+ this initialization needed
+ for afr_notify() to work
+ reliably
+ */
+
+ priv->children = GF_CALLOC (sizeof (xlator_t *), child_count,
+ gf_afr_mt_xlator_t);
+ if (!priv->children) {
+ ret = -ENOMEM;
goto out;
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- if (fd_ctx) {
- if (fd_ctx->child_failed)
- FREE (fd_ctx->child_failed);
-
- FREE (fd_ctx);
}
-
-out:
- return 0;
-}
-
-
-/* {{{ fsync */
-
-int
-afr_fsync_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-{
- afr_local_t *local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno);
-
- return 0;
-}
-
-
-int
-afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t datasync)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int ret = -1;
-
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- call_count = local->call_count;
- frame->local = local;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_fsync_cbk,
- priv->children[i],
- priv->children[i]->fops->fsync,
- fd, datasync);
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno);
- }
- return 0;
-}
-
-/* }}} */
-
-/* {{{ fsync */
-
-int32_t
-afr_fsyncdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-{
- afr_local_t *local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno);
-
- return 0;
-}
-
-
-int32_t
-afr_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t datasync)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int ret = -1;
-
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- call_count = local->call_count;
- frame->local = local;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_fsync_cbk,
- priv->children[i],
- priv->children[i]->fops->fsyncdir,
- fd, datasync);
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno);
- }
- return 0;
-}
-
-/* }}} */
-
-/* {{{ xattrop */
-
-int32_t
-afr_xattrop_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr)
-{
- afr_local_t *local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno, xattr);
-
- return 0;
-}
-
-
-int32_t
-afr_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t optype, dict_t *xattr)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int ret = -1;
-
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- call_count = local->call_count;
- frame->local = local;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_xattrop_cbk,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- loc, optype, xattr);
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno);
- }
- return 0;
-}
-
-/* }}} */
-
-/* {{{ fxattrop */
-
-int32_t
-afr_fxattrop_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr)
-{
- afr_local_t *local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno, xattr);
-
- return 0;
-}
-
-
-int32_t
-afr_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t optype, dict_t *xattr)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int ret = -1;
-
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- call_count = local->call_count;
- frame->local = local;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_fxattrop_cbk,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- fd, optype, xattr);
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno);
- }
- return 0;
-}
-
-/* }}} */
-
-
-int32_t
-afr_inodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
-{
- afr_local_t *local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno);
-
- return 0;
-}
-
-
-int32_t
-afr_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct flock *flock)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int ret = -1;
-
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- call_count = local->call_count;
- frame->local = local;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_inodelk_cbk,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- volume, loc, cmd, flock);
-
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno);
- }
- return 0;
-}
-
-
-int32_t
-afr_finodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
-{
- afr_local_t *local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno);
-
- return 0;
-}
-
-
-int32_t
-afr_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct flock *flock)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int ret = -1;
-
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- call_count = local->call_count;
- frame->local = local;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_finodelk_cbk,
- priv->children[i],
- priv->children[i]->fops->finodelk,
- volume, fd, cmd, flock);
-
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno);
- }
- return 0;
-}
-
-
-int32_t
-afr_entrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
-{
- afr_local_t *local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno);
-
- return 0;
-}
-
-
-int32_t
-afr_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc,
- const char *basename, entrylk_cmd cmd, entrylk_type type)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int ret = -1;
-
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- call_count = local->call_count;
- frame->local = local;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_entrylk_cbk,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- volume, loc, basename, cmd, type);
-
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno);
- }
- return 0;
-}
-
-
-
-int32_t
-afr_fentrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
-{
- afr_local_t *local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno);
-
- return 0;
-}
-
-
-int32_t
-afr_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd,
- const char *basename, entrylk_cmd cmd, entrylk_type type)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int ret = -1;
-
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- call_count = local->call_count;
- frame->local = local;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_fentrylk_cbk,
- priv->children[i],
- priv->children[i]->fops->fentrylk,
- volume, fd, basename, cmd, type);
-
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno);
- }
- return 0;
-}
-
-
-int32_t
-afr_checksum_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- uint8_t *file_checksum, uint8_t *dir_checksum)
-
-{
- afr_local_t *local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0 && (local->op_ret != 0)) {
- local->op_ret = 0;
-
- local->cont.checksum.file_checksum = MALLOC (NAME_MAX);
- memcpy (local->cont.checksum.file_checksum, file_checksum,
- NAME_MAX);
-
- local->cont.checksum.dir_checksum = MALLOC (NAME_MAX);
- memcpy (local->cont.checksum.dir_checksum, dir_checksum,
- NAME_MAX);
-
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->cont.checksum.file_checksum,
- local->cont.checksum.dir_checksum);
-
- return 0;
-}
-
-
-int32_t
-afr_checksum (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flag)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int ret = -1;
-
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- call_count = local->call_count;
- frame->local = local;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_checksum_cbk,
- priv->children[i],
- priv->children[i]->fops->checksum,
- loc, flag);
-
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno);
- }
- return 0;
-}
-
-
-int32_t
-afr_statfs_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct statvfs *statvfs)
-{
- afr_local_t *local = NULL;
-
- int call_count = 0;
-
- LOCK (&frame->lock);
- {
- local = frame->local;
-
- if (op_ret == 0) {
- local->op_ret = op_ret;
-
- if (local->cont.statfs.buf_set) {
- if (statvfs->f_bavail < local->cont.statfs.buf.f_bavail)
- local->cont.statfs.buf = *statvfs;
- } else {
- local->cont.statfs.buf = *statvfs;
- local->cont.statfs.buf_set = 1;
- }
- }
-
- if (op_ret == -1)
- local->op_errno = op_errno;
-
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->cont.statfs.buf);
-
- return 0;
-}
-
-
-int32_t
-afr_statfs (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
-{
- afr_private_t * priv = NULL;
- int child_count = 0;
- afr_local_t * local = NULL;
- int i = 0;
-
- int ret = -1;
- int call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (loc, out);
-
- priv = this->private;
- child_count = priv->child_count;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- frame->local = local;
- call_count = local->call_count;
-
- for (i = 0; i < child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_statfs_cbk,
- priv->children[i],
- priv->children[i]->fops->statfs,
- loc);
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
- return 0;
-}
-
-
-int32_t
-afr_lk_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct flock *lock)
-{
- afr_local_t * local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno,
- lock);
-
- return 0;
-}
-
-
-int32_t
-afr_lk_unlock (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int i;
- int call_count = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_locked_nodes_count (local->cont.lk.locked_nodes,
- priv->child_count);
-
- if (call_count == 0) {
- AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->cont.lk.flock);
- return 0;
- }
-
- local->call_count = call_count;
-
- local->cont.lk.flock.l_type = F_UNLCK;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->cont.lk.locked_nodes[i]) {
- STACK_WIND (frame, afr_lk_unlock_cbk,
- priv->children[i],
- priv->children[i]->fops->lk,
- local->fd, F_SETLK,
- &local->cont.lk.flock);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
-
-
-int32_t
-afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct flock *lock)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int child_index = -1;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- call_count = --local->call_count;
-
- if (!child_went_down (op_ret, op_errno) && (op_ret == -1)) {
- local->op_ret = -1;
- local->op_errno = op_errno;
-
- afr_lk_unlock (frame, this);
- return 0;
- }
-
- if (op_ret == 0) {
- local->op_ret = 0;
- local->op_errno = 0;
- local->cont.lk.flock = *lock;
- local->cont.lk.locked_nodes[child_index] = 1;
- }
-
- child_index++;
-
- if (child_index < priv->child_count) {
- STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->lk,
- local->fd, local->cont.lk.cmd,
- &local->cont.lk.flock);
- } else if (local->op_ret == -1) {
- /* all nodes have gone down */
-
- AFR_STACK_UNWIND (frame, -1, ENOTCONN, &local->cont.lk.flock);
- } else {
- /* locking has succeeded on all nodes that are up */
-
- AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->cont.lk.flock);
- }
-
- return 0;
-}
-
-
-int
-afr_lk (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t cmd,
- struct flock *flock)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int i = 0;
-
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
- AFR_LOCAL_INIT (local, priv);
-
- frame->local = local;
-
- local->cont.lk.locked_nodes = CALLOC (priv->child_count,
- sizeof (*local->cont.lk.locked_nodes));
-
- if (!local->cont.lk.locked_nodes) {
- gf_log (this->name, GF_LOG_ERROR, "Out of memory");
- op_errno = ENOMEM;
- goto out;
- }
-
- local->fd = fd_ref (fd);
- local->cont.lk.cmd = cmd;
- local->cont.lk.flock = *flock;
-
- STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) 0,
- priv->children[i],
- priv->children[i]->fops->lk,
- fd, cmd, flock);
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
- return 0;
-}
-
-
-/**
- * find_child_index - find the child's index in the array of subvolumes
- * @this: AFR
- * @child: child
- */
-
-static int
-find_child_index (xlator_t *this, xlator_t *child)
-{
- afr_private_t *priv = NULL;
-
- int i = -1;
-
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if ((xlator_t *) child == priv->children[i])
- break;
- }
-
- return i;
-}
-
-
-int32_t
-notify (xlator_t *this, int32_t event,
- void *data, ...)
-{
- afr_private_t * priv = NULL;
- unsigned char * child_up = NULL;
-
- int i = -1;
- int up_children = 0;
-
- priv = this->private;
-
- if (!priv)
- return 0;
-
- child_up = priv->child_up;
-
- switch (event) {
- case GF_EVENT_CHILD_UP:
- i = find_child_index (this, data);
-
- child_up[i] = 1;
-
- /*
- if all the children were down, and one child came up,
- send notify to parent
- */
+ priv->pending_key = GF_CALLOC (sizeof (*priv->pending_key),
+ child_count,
+ gf_afr_mt_char);
+ if (!priv->pending_key) {
+ ret = -ENOMEM;
+ goto out;
+ }
- for (i = 0; i < priv->child_count; i++)
- if (child_up[i])
- up_children++;
+ trav = this->children;
+ i = 0;
+ while (i < child_count) {
+ priv->children[i] = trav->xlator;
- if (up_children == 1) {
- gf_log (this->name, GF_LOG_NORMAL,
- "Subvolume '%s' came back up; "
- "going online.", ((xlator_t *)data)->name);
-
- default_notify (this, event, data);
+ ret = gf_asprintf (&priv->pending_key[i], "%s.%s",
+ AFR_XATTR_PREFIX,
+ trav->xlator->name);
+ if (-1 == ret) {
+ ret = -ENOMEM;
+ goto out;
}
- break;
-
- case GF_EVENT_CHILD_DOWN:
- i = find_child_index (this, data);
-
- child_up[i] = 0;
-
- /*
- if all children are down, and this was the last to go down,
- send notify to parent
- */
-
- for (i = 0; i < priv->child_count; i++)
- if (child_up[i])
- up_children++;
+ trav = trav->next;
+ i++;
+ }
- if (up_children == 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "All subvolumes are down. Going offline "
- "until atleast one of them comes back up.");
+ ret = gf_asprintf (&priv->sh_domain, AFR_SH_DATA_DOMAIN_FMT,
+ this->name);
+ if (-1 == ret) {
+ ret = -ENOMEM;
+ goto out;
+ }
- default_notify (this, event, data);
- }
+ priv->last_event = GF_CALLOC (child_count, sizeof (*priv->last_event),
+ gf_afr_mt_int32_t);
+ if (!priv->last_event) {
+ ret = -ENOMEM;
+ goto out;
+ }
- break;
+ /* keep more local here as we may need them for self-heal etc */
+ this->local_pool = mem_pool_new (afr_local_t, 512);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto out;
+ }
- default:
- default_notify (this, event, data);
- }
+ priv->first_lookup = 1;
+ priv->root_inode = NULL;
- return 0;
-}
+ if (!priv->shd.iamshd) {
+ ret = 0;
+ goto out;
+ }
+ ret = -ENOMEM;
+ priv->shd.pos = GF_CALLOC (sizeof (*priv->shd.pos), child_count,
+ gf_afr_mt_brick_pos_t);
+ if (!priv->shd.pos)
+ goto out;
-static const char *favorite_child_warning_str = "You have specified subvolume '%s' "
- "as the 'favorite child'. This means that if a discrepancy in the content "
- "or attributes (ownership, permission, etc.) of a file is detected among "
- "the subvolumes, the file on '%s' will be considered the definitive "
- "version and its contents will OVERWRITE the contents of the file on other "
- "subvolumes. All versions of the file except that on '%s' "
- "WILL BE LOST.";
-
-static const char *no_lock_servers_warning_str = "You have set lock-server-count = 0. "
- "This means correctness is NO LONGER GUARANTEED in all cases. If two or more "
- "applications write to the same region of a file, there is a possibility that "
- "its copies will be INCONSISTENT. Set it to a value greater than 0 unless you "
- "are ABSOLUTELY SURE of what you are doing and WILL NOT HOLD GlusterFS "
- "RESPONSIBLE for inconsistent data. If you are in doubt, set it to a value "
- "greater than 0.";
-
-int32_t
-init (xlator_t *this)
-{
- afr_private_t * priv = NULL;
- int child_count = 0;
- xlator_list_t * trav = NULL;
- int i = 0;
- int ret = -1;
- int op_errno = 0;
-
- char * read_subvol = NULL;
- char * fav_child = NULL;
- char * self_heal = NULL;
- char * change_log = NULL;
-
- int32_t lock_server_count = 1;
-
- int fav_ret = -1;
- int read_ret = -1;
- int dict_ret = -1;
-
- if (!this->children) {
- gf_log (this->name, GF_LOG_ERROR,
- "replicate translator needs more than one "
- "subvolume defined.");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "Volume is dangling.");
- }
-
- ALLOC_OR_GOTO (this->private, afr_private_t, out);
-
- priv = this->private;
-
- read_ret = dict_get_str (this->options, "read-subvolume", &read_subvol);
- priv->read_child = -1;
-
- fav_ret = dict_get_str (this->options, "favorite-child", &fav_child);
- priv->favorite_child = -1;
-
- /* Default values */
-
- priv->data_self_heal = 1;
- priv->metadata_self_heal = 1;
- priv->entry_self_heal = 1;
-
- dict_ret = dict_get_str (this->options, "data-self-heal", &self_heal);
- if (dict_ret == 0) {
- ret = gf_string2boolean (self_heal, &priv->data_self_heal);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Invalid 'option data-self-heal %s'. "
- "Defaulting to data-self-heal as 'on'",
- self_heal);
- priv->data_self_heal = 1;
- }
- }
-
- dict_ret = dict_get_str (this->options, "metadata-self-heal",
- &self_heal);
- if (dict_ret == 0) {
- ret = gf_string2boolean (self_heal, &priv->metadata_self_heal);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Invalid 'option metadata-self-heal %s'. "
- "Defaulting to metadata-self-heal as 'on'.",
- self_heal);
- priv->metadata_self_heal = 1;
- }
- }
-
- dict_ret = dict_get_str (this->options, "entry-self-heal", &self_heal);
- if (dict_ret == 0) {
- ret = gf_string2boolean (self_heal, &priv->entry_self_heal);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Invalid 'option entry-self-heal %s'. "
- "Defaulting to entry-self-heal as 'on'.",
- self_heal);
- priv->entry_self_heal = 1;
- }
- }
-
- /* Change log options */
-
- priv->data_change_log = 1;
- priv->metadata_change_log = 0;
- priv->entry_change_log = 1;
-
- dict_ret = dict_get_str (this->options, "data-change-log",
- &change_log);
- if (dict_ret == 0) {
- ret = gf_string2boolean (change_log, &priv->data_change_log);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Invalid 'option data-change-log %s'. "
- "Defaulting to data-change-log as 'on'.",
- change_log);
- priv->data_change_log = 1;
- }
- }
-
- dict_ret = dict_get_str (this->options, "metadata-change-log",
- &change_log);
- if (dict_ret == 0) {
- ret = gf_string2boolean (change_log,
- &priv->metadata_change_log);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Invalid 'option metadata-change-log %s'. "
- "Defaulting to metadata-change-log as 'off'.",
- change_log);
- priv->metadata_change_log = 0;
- }
- }
-
- dict_ret = dict_get_str (this->options, "entry-change-log",
- &change_log);
- if (dict_ret == 0) {
- ret = gf_string2boolean (change_log, &priv->entry_change_log);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Invalid 'option entry-change-log %s'. "
- "Defaulting to entry-change-log as 'on'.",
- change_log);
- priv->entry_change_log = 1;
- }
- }
-
- /* Locking options */
-
- priv->data_lock_server_count = 1;
- priv->metadata_lock_server_count = 0;
- priv->entry_lock_server_count = 1;
-
- dict_ret = dict_get_int32 (this->options, "data-lock-server-count",
- &lock_server_count);
- if (dict_ret == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Setting data lock server count to %d.",
- lock_server_count);
-
- if (lock_server_count == 0)
- gf_log (this->name, GF_LOG_WARNING,
- no_lock_servers_warning_str);
-
- priv->data_lock_server_count = lock_server_count;
- }
-
-
- dict_ret = dict_get_int32 (this->options,
- "metadata-lock-server-count",
- &lock_server_count);
- if (dict_ret == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Setting metadata lock server count to %d.",
- lock_server_count);
- priv->metadata_lock_server_count = lock_server_count;
- }
-
-
- dict_ret = dict_get_int32 (this->options, "entry-lock-server-count",
- &lock_server_count);
- if (dict_ret == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Setting entry lock server count to %d.",
- lock_server_count);
-
- priv->entry_lock_server_count = lock_server_count;
- }
-
- trav = this->children;
- while (trav) {
- if (!read_ret && !strcmp (read_subvol, trav->xlator->name)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Subvolume '%s' specified as read child.",
- trav->xlator->name);
-
- priv->read_child = child_count;
- }
-
- if (fav_ret == 0 && !strcmp (fav_child, trav->xlator->name)) {
- gf_log (this->name, GF_LOG_WARNING,
- favorite_child_warning_str, trav->xlator->name,
- trav->xlator->name, trav->xlator->name);
- priv->favorite_child = child_count;
- }
-
- child_count++;
- trav = trav->next;
- }
-
- priv->wait_count = 1;
-
- priv->child_count = child_count;
-
- LOCK_INIT (&priv->lock);
- LOCK_INIT (&priv->read_child_lock);
+ priv->shd.pending = GF_CALLOC (sizeof (*priv->shd.pending), child_count,
+ gf_afr_mt_int32_t);
+ if (!priv->shd.pending)
+ goto out;
- priv->child_up = CALLOC (sizeof (unsigned char), child_count);
- if (!priv->child_up) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
- goto out;
- }
-
- priv->children = CALLOC (sizeof (xlator_t *), child_count);
- if (!priv->children) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
- goto out;
- }
-
- priv->pending_key = CALLOC (sizeof (*priv->pending_key), child_count);
- if (!priv->pending_key) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
+ priv->shd.inprogress = GF_CALLOC (sizeof (*priv->shd.inprogress),
+ child_count, gf_afr_mt_shd_bool_t);
+ if (!priv->shd.inprogress)
+ goto out;
+ priv->shd.timer = GF_CALLOC (sizeof (*priv->shd.timer), child_count,
+ gf_afr_mt_shd_timer_t);
+ if (!priv->shd.timer)
goto out;
- }
- trav = this->children;
- i = 0;
- while (i < child_count) {
- priv->children[i] = trav->xlator;
+ priv->shd.healed = eh_new (AFR_EH_HEALED_LIMIT, _gf_false,
+ _destroy_shd_event_data);
+ if (!priv->shd.healed)
+ goto out;
- asprintf (&priv->pending_key[i], "%s.%s", AFR_XATTR_PREFIX,
- trav->xlator->name);
+ priv->shd.heal_failed = eh_new (AFR_EH_HEAL_FAIL_LIMIT, _gf_false,
+ _destroy_shd_event_data);
+ if (!priv->shd.heal_failed)
+ goto out;
- trav = trav->next;
- i++;
- }
+ priv->shd.split_brain = eh_new (AFR_EH_SPLIT_BRAIN_LIMIT, _gf_false,
+ _destroy_shd_event_data);
+ if (!priv->shd.split_brain)
+ goto out;
- ret = 0;
+ this->itable = inode_table_new (SHD_INODE_LRU_LIMIT, this);
+ if (!this->itable)
+ goto out;
+ priv->root_inode = inode_ref (this->itable->root);
+ GF_OPTION_INIT ("node-uuid", priv->shd.node_uuid, str, out);
+ GF_OPTION_INIT ("heal-timeout", priv->shd.timeout, int32, out);
+ ret = afr_initialise_statistics (this);
+ if (ret)
+ goto out;
+ ret = 0;
out:
- return ret;
+ return ret;
}
int
fini (xlator_t *this)
{
- return 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ this->private = NULL;
+ afr_priv_destroy (priv);
+ if (this->itable);//I dont see any destroy func
+
+ return 0;
}
struct xlator_fops fops = {
- .lookup = afr_lookup,
- .open = afr_open,
- .lk = afr_lk,
- .flush = afr_flush,
- .statfs = afr_statfs,
- .fsync = afr_fsync,
- .fsyncdir = afr_fsyncdir,
- .xattrop = afr_xattrop,
- .fxattrop = afr_fxattrop,
- .inodelk = afr_inodelk,
- .finodelk = afr_finodelk,
- .entrylk = afr_entrylk,
- .fentrylk = afr_fentrylk,
- .checksum = afr_checksum,
-
- /* inode read */
- .access = afr_access,
- .stat = afr_stat,
- .fstat = afr_fstat,
- .readlink = afr_readlink,
- .getxattr = afr_getxattr,
- .readv = afr_readv,
-
- /* inode write */
- .chmod = afr_chmod,
- .chown = afr_chown,
- .fchmod = afr_fchmod,
- .fchown = afr_fchown,
- .writev = afr_writev,
- .truncate = afr_truncate,
- .ftruncate = afr_ftruncate,
- .utimens = afr_utimens,
- .setxattr = afr_setxattr,
- .removexattr = afr_removexattr,
-
- /* dir read */
- .opendir = afr_opendir,
- .readdir = afr_readdir,
- .getdents = afr_getdents,
-
- /* dir write */
- .create = afr_create,
- .mknod = afr_mknod,
- .mkdir = afr_mkdir,
- .unlink = afr_unlink,
- .rmdir = afr_rmdir,
- .link = afr_link,
- .symlink = afr_symlink,
- .rename = afr_rename,
- .setdents = afr_setdents,
+ .lookup = afr_lookup,
+ .open = afr_open,
+ .lk = afr_lk,
+ .flush = afr_flush,
+ .statfs = afr_statfs,
+ .fsync = afr_fsync,
+ .fsyncdir = afr_fsyncdir,
+ .xattrop = afr_xattrop,
+ .fxattrop = afr_fxattrop,
+ .inodelk = afr_inodelk,
+ .finodelk = afr_finodelk,
+ .entrylk = afr_entrylk,
+ .fentrylk = afr_fentrylk,
+ .fallocate = afr_fallocate,
+ .discard = afr_discard,
+ .zerofill = afr_zerofill,
+
+ /* inode read */
+ .access = afr_access,
+ .stat = afr_stat,
+ .fstat = afr_fstat,
+ .readlink = afr_readlink,
+ .getxattr = afr_getxattr,
+ .fgetxattr = afr_fgetxattr,
+ .readv = afr_readv,
+
+ /* inode write */
+ .writev = afr_writev,
+ .truncate = afr_truncate,
+ .ftruncate = afr_ftruncate,
+ .setxattr = afr_setxattr,
+ .fsetxattr = afr_fsetxattr,
+ .setattr = afr_setattr,
+ .fsetattr = afr_fsetattr,
+ .removexattr = afr_removexattr,
+ .fremovexattr = afr_fremovexattr,
+
+ /* dir read */
+ .opendir = afr_opendir,
+ .readdir = afr_readdir,
+ .readdirp = afr_readdirp,
+
+ /* dir write */
+ .create = afr_create,
+ .mknod = afr_mknod,
+ .mkdir = afr_mkdir,
+ .unlink = afr_unlink,
+ .rmdir = afr_rmdir,
+ .link = afr_link,
+ .symlink = afr_symlink,
+ .rename = afr_rename,
};
-struct xlator_mops mops = {
+struct xlator_dumpops dumpops = {
+ .priv = afr_priv_dump,
};
struct xlator_cbks cbks = {
.release = afr_release,
+ .releasedir = afr_releasedir,
+ .forget = afr_forget,
};
struct volume_options options[] = {
- { .key = {"read-subvolume" },
- .type = GF_OPTION_TYPE_XLATOR
- },
- { .key = {"favorite-child"},
- .type = GF_OPTION_TYPE_XLATOR
- },
- { .key = {"data-self-heal"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"metadata-self-heal"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"entry-self-heal"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"data-change-log"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"metadata-change-log"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"entry-change-log"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"data-lock-server-count"},
- .type = GF_OPTION_TYPE_INT,
- .min = 0
- },
- { .key = {"metadata-lock-server-count"},
- .type = GF_OPTION_TYPE_INT,
- .min = 0
- },
- { .key = {"entry-lock-server-count"},
- .type = GF_OPTION_TYPE_INT,
- .min = 0
- },
- { .key = {NULL} },
+ { .key = {"read-subvolume" },
+ .type = GF_OPTION_TYPE_XLATOR,
+ .description = "inode-read fops happen only on one of the bricks in "
+ "replicate. Afr will prefer the one specified using "
+ "this option if it is not stale. Option value must be "
+ "one of the xlator names of the children. "
+ "Ex: <volname>-client-0 till "
+ "<volname>-client-<number-of-bricks - 1>"
+ },
+ { .key = {"read-subvolume-index" },
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "-1",
+ .description = "inode-read fops happen only on one of the bricks in "
+ "replicate. AFR will prefer the one specified using "
+ "this option if it is not stale. allowed options"
+ " include -1 till replica-count - 1"
+ },
+ { .key = {"read-hash-mode" },
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = 2,
+ .default_value = "0",
+ .description = "inode-read fops happen only on one of the bricks in "
+ "replicate. AFR will prefer the one computed using "
+ "the method specified using this option"
+ "0 = first responder, "
+ "1 = hash by GFID of file (all clients use "
+ "same subvolume), "
+ "2 = hash by GFID of file and client PID",
+ },
+ { .key = {"choose-local" },
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "true",
+ .description = "Choose a local subvolume(i.e. Brick) to read from if "
+ "read-subvolume is not explicitly set.",
+ },
+ { .key = {"favorite-child"},
+ .type = GF_OPTION_TYPE_XLATOR,
+ .description = "If a split-brain happens choose subvol/brick set by "
+ "this option as source."
+ },
+ { .key = {"background-self-heal-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .default_value = "16",
+ .validate = GF_OPT_VALIDATE_MIN,
+ .description = "This specifies the number of self-heals that can be "
+ " performed in background without blocking the fop"
+ },
+ { .key = {"data-self-heal"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = {"1", "on", "yes", "true", "enable",
+ "0", "off", "no", "false", "disable",
+ "open"},
+ .default_value = "on",
+ .description = "Using this option we can enable/disable data "
+ "self-heal on the file. \"open\" means data "
+ "self-heal action will only be triggered by file "
+ "open operations."
+ },
+ { .key = {"data-self-heal-algorithm"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Select between \"full\", \"diff\". The "
+ "\"full\" algorithm copies the entire file from "
+ "source to sink. The \"diff\" algorithm copies to "
+ "sink only those blocks whose checksums don't match "
+ "with those of source. If no option is configured "
+ "the option is chosen dynamically as follows: "
+ "If the file does not exist on one of the sinks "
+ "or empty file exists or if the source file size is "
+ "about the same as page size the entire file will "
+ "be read and written i.e \"full\" algo, "
+ "otherwise \"diff\" algo is chosen.",
+ .value = { "diff", "full"}
+ },
+ { .key = {"data-self-heal-window-size"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 1024,
+ .default_value = "1",
+ .description = "Maximum number blocks per file for which self-heal "
+ "process would be applied simultaneously."
+ },
+ { .key = {"metadata-self-heal"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Using this option we can enable/disable metadata "
+ "i.e. Permissions, ownerships, xattrs self-heal on "
+ "the file/directory."
+ },
+ { .key = {"entry-self-heal"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Using this option we can enable/disable entry "
+ "self-heal on the directory."
+ },
+ { .key = {"data-change-log"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Data fops like write/truncate will not perform "
+ "pre/post fop changelog operations in afr transaction "
+ "if this option is disabled"
+ },
+ { .key = {"metadata-change-log"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Metadata fops like setattr/setxattr will not perform "
+ "pre/post fop changelog operations in afr transaction "
+ "if this option is disabled"
+ },
+ { .key = {"entry-change-log"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Entry fops like create/unlink will not perform "
+ "pre/post fop changelog operations in afr transaction "
+ "if this option is disabled"
+ },
+ { .key = {"optimistic-change-log"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Entry/Metadata fops will not perform "
+ "pre fop changelog operations in afr transaction "
+ "if this option is enabled."
+ },
+ { .key = {"strict-readdir"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ },
+ { .key = {"inodelk-trace"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Enabling this option logs inode lock/unlocks"
+ },
+ { .key = {"entrylk-trace"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Enabling this option logs entry lock/unlocks"
+ },
+ { .key = {"eager-lock"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Lock phase of a transaction has two sub-phases. "
+ "First is an attempt to acquire locks in parallel by "
+ "broadcasting non-blocking lock requests. If lock "
+ "aquistion fails on any server, then the held locks "
+ "are unlocked and revert to a blocking locked mode "
+ "sequentially on one server after another. If this "
+ "option is enabled the initial broadcasting lock "
+ "request attempt to acquire lock on the entire file. "
+ "If this fails, we revert back to the sequential "
+ "\"regional\" blocking lock as before. In the case "
+ "where such an \"eager\" lock is granted in the "
+ "non-blocking phase, it gives rise to an opportunity "
+ "for optimization. i.e, if the next write transaction "
+ "on the same FD arrives before the unlock phase of "
+ "the first transaction, it \"takes over\" the full "
+ "file lock. Similarly if yet another data transaction "
+ "arrives before the unlock phase of the \"optimized\" "
+ "transaction, that in turn \"takes over\" the lock as "
+ "well. The actual unlock now happens at the end of "
+ "the last \"optimzed\" transaction."
+
+ },
+ { .key = {"self-heal-daemon"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option applies to only self-heal-daemon. "
+ "Index directory crawl and automatic healing of files"
+ "will not be performed if this option is turned off."
+ },
+ { .key = {"iam-self-heal-daemon"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option differentiates if the replicate "
+ "translator is running as part of self-heal-daemon "
+ "or not."
+ },
+ { .key = {"quorum-type"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = { "none", "auto", "fixed"},
+ .default_value = "none",
+ .description = "If value is \"fixed\" only allow writes if "
+ "quorum-count bricks are present. If value is "
+ "\"auto\" only allow writes if more than half of "
+ "bricks, or exactly half including the first, are "
+ "present.",
+ },
+ { .key = {"quorum-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = INT_MAX,
+ .default_value = 0,
+ .description = "If quorum-type is \"fixed\" only allow writes if "
+ "this many bricks or present. Other quorum types "
+ "will OVERWRITE this value.",
+ },
+ { .key = {"node-uuid"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Local glusterd uuid string, used in starting "
+ "self-heal-daemon so that it can crawl only on "
+ "local index directories.",
+ },
+ { .key = {"heal-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 60,
+ .max = INT_MAX,
+ .default_value = "600",
+ .description = "time interval for checking the need to self-heal "
+ "in self-heal-daemon"
+ },
+ { .key = {"post-op-delay-secs"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = INT_MAX,
+ .default_value = "1",
+ .description = "Time interval induced artificially before "
+ "post-operation phase of the transaction to "
+ "enhance overlap of adjacent write operations.",
+ },
+ { .key = {AFR_SH_READDIR_SIZE_KEY},
+ .type = GF_OPTION_TYPE_SIZET,
+ .description = "readdirp size for performing entry self-heal",
+ .min = 1024,
+ .max = 131072,
+ .default_value = "1KB",
+ },
+ { .key = {"readdir-failover"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .description = "readdir(p) will not failover if this option is off",
+ .default_value = "on",
+ },
+ { .key = {"ensure-durability"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .description = "Afr performs fsyncs for transactions if this "
+ "option is on to make sure the changelogs/data is "
+ "written to the disk",
+ .default_value = "on",
+ },
+ { .key = {NULL} },
};
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 2871bfc47..21064db58 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -26,85 +17,347 @@
#include "config.h"
#endif
-#include "scheduler.h"
#include "call-stub.h"
#include "compat-errno.h"
+#include "afr-mem-types.h"
+#include "afr-self-heal-algorithm.h"
+
+#include "libxlator.h"
+#include "timer.h"
#define AFR_XATTR_PREFIX "trusted.afr"
+#define AFR_PATHINFO_HEADER "REPLICATE:"
+#define AFR_SH_READDIR_SIZE_KEY "self-heal-readdir-size"
+#define AFR_SH_DATA_DOMAIN_FMT "%s:self-heal"
-typedef struct _afr_private {
- gf_lock_t lock; /* to guard access to child_count, etc */
- unsigned int child_count; /* total number of children */
+#define AFR_LOCKEE_COUNT_MAX 3
+#define AFR_DOM_COUNT_MAX 3
- unsigned int read_child_rr; /* round-robin index of the read_child */
- gf_lock_t read_child_lock; /* lock to protect above */
-
- xlator_t **children;
+struct _pump_private;
- unsigned char *child_up;
+typedef int (*afr_expunge_done_cbk_t) (call_frame_t *frame, xlator_t *this,
+ int child, int32_t op_error,
+ int32_t op_errno);
- char **pending_key;
+typedef int (*afr_impunge_done_cbk_t) (call_frame_t *frame, xlator_t *this,
+ int32_t op_error, int32_t op_errno);
+typedef int (*afr_post_remove_call_t) (call_frame_t *frame, xlator_t *this);
- gf_boolean_t data_self_heal; /* on/off */
- gf_boolean_t metadata_self_heal; /* on/off */
- gf_boolean_t entry_self_heal; /* on/off */
+typedef int (*afr_lock_cbk_t) (call_frame_t *frame, xlator_t *this);
+typedef void (*afr_lookup_done_cbk_t) (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno);
- gf_boolean_t data_change_log; /* on/off */
- gf_boolean_t metadata_change_log; /* on/off */
- gf_boolean_t entry_change_log; /* on/off */
+typedef enum {
+ AFR_POS_UNKNOWN,
+ AFR_POS_LOCAL,
+ AFR_POS_REMOTE
+} afr_child_pos_t;
- int read_child; /* read-subvolume */
- unsigned int favorite_child; /* subvolume to be preferred in resolving
- split-brain cases */
+typedef enum {
+ SPLIT_BRAIN = 1,
+ ALL_FOOLS = 2
+} afr_subvol_status_t;
- unsigned int data_lock_server_count;
- unsigned int metadata_lock_server_count;
- unsigned int entry_lock_server_count;
+typedef enum {
+ AFR_INODE_SET_READ_CTX = 1,
+ AFR_INODE_RM_STALE_CHILDREN,
+ AFR_INODE_SET_OPENDIR_DONE,
+ AFR_INODE_GET_READ_CTX,
+ AFR_INODE_GET_OPENDIR_DONE,
+} afr_inode_op_t;
+
+typedef struct afr_inode_params_ {
+ afr_inode_op_t op;
+ union {
+ gf_boolean_t value;
+ struct {
+ int32_t read_child;
+ int32_t *children;
+ } read_ctx;
+ } u;
+} afr_inode_params_t;
+
+typedef enum afr_spb_state {
+ DONT_KNOW,
+ SPB,
+ NO_SPB
+} afr_spb_state_t;
+
+typedef struct afr_inode_ctx_ {
+ uint64_t masks;
+ int32_t *fresh_children;//increasing order of latency
+ afr_spb_state_t mdata_spb;
+ afr_spb_state_t data_spb;
+ uint32_t open_fd_count;
+} afr_inode_ctx_t;
- unsigned int wait_count; /* # of servers to wait for success */
-} afr_private_t;
+typedef enum {
+ NONE,
+ INDEX,
+ INDEX_TO_BE_HEALED,
+ FULL,
+} afr_crawl_type_t;
+
+typedef struct afr_self_heald_ {
+ gf_boolean_t enabled;
+ gf_boolean_t iamshd;
+ afr_crawl_type_t *pending;
+ gf_boolean_t *inprogress;
+ afr_child_pos_t *pos;
+ gf_timer_t **timer;
+ eh_t *healed;
+ eh_t *heal_failed;
+ eh_t *split_brain;
+ eh_t **statistics;
+ void **crawl_events;
+ char *node_uuid;
+ int timeout;
+} afr_self_heald_t;
-typedef struct {
- /* array of stat's, one for each child */
- struct stat *buf;
+typedef struct _afr_private {
+ gf_lock_t lock; /* to guard access to child_count, etc */
+ unsigned int child_count; /* total number of children */
+
+ unsigned int read_child_rr; /* round-robin index of the read_child */
+ gf_lock_t read_child_lock; /* lock to protect above */
+
+ xlator_t **children;
- /* array of xattr's, one for each child */
- dict_t **xattr;
+ int first_lookup;
+ inode_t *root_inode;
- /* array of errno's, one for each child */
- int *child_errno;
+ unsigned char *child_up;
- int32_t **pending_matrix;
- int32_t **delta_matrix;
+ char **pending_key;
- int *sources;
- int source;
- int active_source;
- int active_sinks;
- int *success;
+ char *data_self_heal; /* on/off/open */
+ char * data_self_heal_algorithm; /* name of algorithm */
+ unsigned int data_self_heal_window_size; /* max number of pipelined
+ read/writes */
+
+ unsigned int background_self_heal_count;
+ unsigned int background_self_heals_started;
+ gf_boolean_t metadata_self_heal; /* on/off */
+ gf_boolean_t entry_self_heal; /* on/off */
+
+ gf_boolean_t data_change_log; /* on/off */
+ gf_boolean_t metadata_change_log; /* on/off */
+ gf_boolean_t entry_change_log; /* on/off */
+
+ int read_child; /* read-subvolume */
+ unsigned int hash_mode; /* for when read_child is not set */
+ int favorite_child; /* subvolume to be preferred in resolving
+ split-brain cases */
+
+ gf_boolean_t inodelk_trace;
+ gf_boolean_t entrylk_trace;
+
+ gf_boolean_t strict_readdir;
+
+ unsigned int wait_count; /* # of servers to wait for success */
+
+ uint64_t up_count; /* number of CHILD_UPs we have seen */
+ uint64_t down_count; /* number of CHILD_DOWNs we have seen */
+
+ struct _pump_private *pump_private; /* Set if we are loaded as pump */
+ int use_afr_in_pump;
+
+ pthread_mutex_t mutex;
+ struct list_head saved_fds; /* list of fds on which locks have succeeded */
+ gf_boolean_t optimistic_change_log;
+ gf_boolean_t eager_lock;
+ uint32_t post_op_delay_secs;
+ unsigned int quorum_count;
+
+ char vol_uuid[UUID_SIZE + 1];
+ int32_t *last_event;
+ afr_self_heald_t shd;
+ gf_boolean_t choose_local;
+ gf_boolean_t did_discovery;
+ gf_boolean_t readdir_failover;
+ uint64_t sh_readdir_size;
+ gf_boolean_t ensure_durability;
+ char *sh_domain;
+} afr_private_t;
- fd_t *healing_fd;
- int op_failed;
+typedef enum {
+ AFR_SELF_HEAL_NOT_ATTEMPTED,
+ AFR_SELF_HEAL_STARTED,
+ AFR_SELF_HEAL_FAILED,
+ AFR_SELF_HEAL_SYNC_BEGIN,
+} afr_self_heal_status;
- int file_has_holes;
- blksize_t block_size;
- off_t file_size;
- off_t offset;
+typedef struct {
+ afr_self_heal_status gfid_or_missing_entry_self_heal;
+ afr_self_heal_status metadata_self_heal;
+ afr_self_heal_status data_self_heal;
+ afr_self_heal_status entry_self_heal;
+} afr_sh_status_for_all_type;
- loc_t parent_loc;
- int (*completion_cbk) (call_frame_t *frame, xlator_t *this);
- call_frame_t *sh_frame;
-} afr_self_heal_t;
+typedef enum {
+ AFR_SELF_HEAL_ENTRY,
+ AFR_SELF_HEAL_METADATA,
+ AFR_SELF_HEAL_DATA,
+ AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY,
+ AFR_SELF_HEAL_INVALID = -1,
+} afr_self_heal_type;
+typedef enum {
+ AFR_CHECK_ALL,
+ AFR_CHECK_SPECIFIC,
+} afr_sh_fail_check_type;
+
+struct afr_self_heal_ {
+ /* External interface: These are variables (some optional) that
+ are set by whoever has triggered self-heal */
+
+ gf_boolean_t do_data_self_heal;
+ gf_boolean_t do_metadata_self_heal;
+ gf_boolean_t do_entry_self_heal;
+ gf_boolean_t do_gfid_self_heal;
+ gf_boolean_t do_missing_entry_self_heal;
+ gf_boolean_t force_confirm_spb; /* Check for split-brains even when
+ self-heal is turned off */
+
+ gf_boolean_t forced_merge; /* Is this a self-heal triggered to
+ forcibly merge the directories? */
+
+ gf_boolean_t background; /* do self-heal in background
+ if possible */
+ ia_type_t type; /* st_mode of the entry we're doing
+ self-heal on */
+ inode_t *inode; /* inode on which the self-heal is
+ performed on */
+ uuid_t sh_gfid_req; /* gfid self-heal needs to be done
+ with this gfid if it is not null */
+
+ /* Function to call to unwind. If self-heal is being done in the
+ background, this function will be called as soon as possible. */
+
+ int (*unwind) (call_frame_t *frame, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, int32_t sh_failed);
+
+ /* End of external interface members */
+
+
+ /* array of stat's, one for each child */
+ struct iatt *buf;
+ struct iatt *parentbufs;
+ struct iatt parentbuf;
+ struct iatt entrybuf;
+
+ afr_expunge_done_cbk_t expunge_done;
+ afr_impunge_done_cbk_t impunge_done;
+
+ /* array of xattr's, one for each child */
+ dict_t **xattr;
+
+ /* array containing if the lookups succeeded in the order of response
+ */
+ int32_t *success_children;
+ int success_count;
+ /* array containing the fresh children found in the self-heal process */
+ int32_t *fresh_children;
+ /* array containing the fresh children found in the parent lookup */
+ int32_t *fresh_parent_dirs;
+ /* array of errno's, one for each child */
+ int *child_errno;
+ /*loc used for lookup*/
+ loc_t lookup_loc;
+ int32_t lookup_flags;
+ afr_lookup_done_cbk_t lookup_done;
+
+ int32_t **pending_matrix;
+ int32_t **delta_matrix;
+
+ int32_t op_ret;
+ int32_t op_errno;
+
+ int *sources;
+ int source;
+ int active_source;
+ int active_sinks;
+ unsigned char *success;
+ unsigned char *locked_nodes;
+ int lock_count;
+
+ const char *linkname;
+ gf_boolean_t entries_skipped;
+
+ gf_boolean_t actual_sh_started;
+ gf_boolean_t sync_done;
+ gf_boolean_t data_lock_held;
+ gf_boolean_t sh_dom_lock_held;
+ gf_boolean_t eof_reached;
+ fd_t *healing_fd;
+ int file_has_holes;
+ blksize_t block_size;
+ off_t file_size;
+ off_t offset;
+ unsigned char *write_needed;
+ uint8_t *checksum;
+ afr_post_remove_call_t post_remove_call;
+
+ char *data_sh_info;
+ char *metadata_sh_info;
+
+ loc_t parent_loc;
+ call_frame_t *orig_frame;
+ call_frame_t *old_loop_frame;
+ gf_boolean_t unwound;
+
+ afr_sh_algo_private_t *private;
+ afr_sh_status_for_all_type afr_all_sh_status;
+ afr_self_heal_type sh_type_in_action;
+
+ struct afr_sh_algorithm *algo;
+ afr_lock_cbk_t data_lock_success_handler;
+ afr_lock_cbk_t data_lock_failure_handler;
+ gf_boolean_t data_lock_block;
+ int (*completion_cbk) (call_frame_t *frame, xlator_t *this);
+ int (*sh_data_algo_start) (call_frame_t *frame, xlator_t *this);
+ int (*algo_completion_cbk) (call_frame_t *frame, xlator_t *this);
+ int (*algo_abort_cbk) (call_frame_t *frame, xlator_t *this);
+ void (*gfid_sh_success_cbk) (call_frame_t *sh_frame, xlator_t *this);
+
+ call_frame_t *sh_frame;
+};
+
+typedef struct afr_self_heal_ afr_self_heal_t;
typedef enum {
- AFR_DATA_TRANSACTION, /* truncate, write, ... */
- AFR_METADATA_TRANSACTION, /* chmod, chown, ... */
- AFR_ENTRY_TRANSACTION, /* create, rmdir, ... */
- AFR_ENTRY_RENAME_TRANSACTION, /* rename */
- AFR_FLUSH_TRANSACTION, /* flush */
+ AFR_DATA_TRANSACTION, /* truncate, write, ... */
+ AFR_METADATA_TRANSACTION, /* chmod, chown, ... */
+ AFR_ENTRY_TRANSACTION, /* create, rmdir, ... */
+ AFR_ENTRY_RENAME_TRANSACTION, /* rename */
} afr_transaction_type;
+typedef enum {
+ AFR_TRANSACTION_LK,
+ AFR_SELFHEAL_LK,
+} transaction_lk_type_t;
+
+typedef enum {
+ AFR_LOCK_OP,
+ AFR_UNLOCK_OP,
+} afr_lock_op_type_t;
+
+typedef enum {
+ AFR_DATA_SELF_HEAL_LK,
+ AFR_METADATA_SELF_HEAL_LK,
+ AFR_ENTRY_SELF_HEAL_LK,
+}selfheal_lk_type_t;
+
+typedef enum {
+ AFR_INODELK_TRANSACTION,
+ AFR_INODELK_NB_TRANSACTION,
+ AFR_ENTRYLK_TRANSACTION,
+ AFR_ENTRYLK_NB_TRANSACTION,
+ AFR_INODELK_SELFHEAL,
+ AFR_INODELK_NB_SELFHEAL,
+ AFR_ENTRYLK_SELFHEAL,
+ AFR_ENTRYLK_NB_SELFHEAL,
+} afr_lock_call_type_t;
/*
xattr format: trusted.afr.volume = [x y z]
@@ -117,9 +370,8 @@ static inline int
afr_index_for_transaction_type (afr_transaction_type type)
{
switch (type) {
-
+
case AFR_DATA_TRANSACTION:
- case AFR_FLUSH_TRANSACTION:
return 0;
case AFR_METADATA_TRANSACTION:
@@ -133,352 +385,551 @@ afr_index_for_transaction_type (afr_transaction_type type)
return -1; /* make gcc happy */
}
+typedef struct {
+ loc_t loc;
+ char *basename;
+ unsigned char *locked_nodes;
+ int locked_count;
-typedef struct _afr_local {
- unsigned int call_count;
- unsigned int success_count;
- unsigned int enoent_count;
+} afr_entry_lockee_t;
- unsigned int need_metadata_self_heal;
- unsigned int need_entry_self_heal;
- unsigned int need_data_self_heal;
- unsigned int govinda_gOvinda;
+int
+afr_entry_lockee_cmp (const void *l1, const void *l2);
- unsigned int read_child_index;
+typedef struct {
+ char *domain; /* Domain on which inodelk is taken */
+ struct gf_flock flock;
+ unsigned char *locked_nodes;
+ int32_t lock_count;
+} afr_inodelk_t;
- pid_t saved_pid;
+typedef struct {
+ loc_t *lk_loc;
- int32_t op_ret;
- int32_t op_errno;
+ int lockee_count;
+ afr_entry_lockee_t lockee[AFR_LOCKEE_COUNT_MAX];
- int32_t **pending;
+ afr_inodelk_t inodelk[AFR_DOM_COUNT_MAX];
+ const char *lk_basename;
+ const char *lower_basename;
+ const char *higher_basename;
+ char lower_locked;
+ char higher_locked;
- loc_t loc;
- loc_t newloc;
+ unsigned char *locked_nodes;
+ unsigned char *lower_locked_nodes;
- fd_t *fd;
+ selfheal_lk_type_t selfheal_lk_type;
+ transaction_lk_type_t transaction_lk_type;
- glusterfs_fop_t fop;
+ int32_t lock_count;
+ int32_t entrylk_lock_count;
- unsigned char *child_up;
- int child_count;
+ uint64_t lock_number;
+ int32_t lk_call_count;
+ int32_t lk_expected_count;
+ int32_t lk_attempted_count;
- int32_t *child_errno;
-
- dict_t *xattr_req;
- int open_fd_count;
- /*
- This struct contains the arguments for the "continuation"
- (scheme-like) of fops
- */
+ int32_t lock_op_ret;
+ int32_t lock_op_errno;
+ afr_lock_cbk_t lock_cbk;
+ char *domain; /* Domain on which inode/entry lock/unlock in progress.*/
+} afr_internal_lock_t;
- int op;
- struct {
- struct {
- unsigned char buf_set;
- struct statvfs buf;
- } statfs;
+typedef struct _afr_locked_fd {
+ fd_t *fd;
+ struct list_head list;
+} afr_locked_fd_t;
- struct {
- inode_t *inode;
- struct stat buf;
- dict_t *xattr;
- } lookup;
+struct afr_reply {
+ int valid;
+ int32_t op_ret;
+ int32_t op_errno;
+};
- struct {
- int32_t flags;
- } open;
+typedef struct _afr_local {
+ int uid;
+ int gid;
+ unsigned int call_count;
+ unsigned int success_count;
+ unsigned int enoent_count;
+ uint32_t open_fd_count;
+ gf_boolean_t update_open_fd_count;
- struct {
- int32_t cmd;
- struct flock flock;
- unsigned char *locked_nodes;
- } lk;
- struct {
- uint8_t *file_checksum;
- uint8_t *dir_checksum;
- } checksum;
+ unsigned int unhealable;
- /* inode read */
+ unsigned int read_child_index;
+ unsigned char read_child_returned;
+ unsigned int first_up_child;
- struct {
- int32_t mask;
- int last_tried; /* index of the child we tried previously */
- } access;
+ gf_lkowner_t saved_lk_owner;
- struct {
- int last_tried;
- ino_t ino;
- } stat;
+ int32_t op_ret;
+ int32_t op_errno;
- struct {
- int last_tried;
- ino_t ino;
- } fstat;
+ int32_t **pending;
- struct {
- size_t size;
- int last_tried;
- } readlink;
+ loc_t loc;
+ loc_t newloc;
- struct {
- const char *name;
- int last_tried;
- } getxattr;
+ fd_t *fd;
- struct {
- size_t size;
- off_t offset;
- int last_tried;
- } readv;
+ glusterfs_fop_t fop;
- /* dir read */
+ unsigned char *child_up;
+ int32_t *fresh_children; //in the order of response
- struct {
- int success_count;
- int32_t op_ret;
- int32_t op_errno;
- } opendir;
+ int32_t *child_errno;
- struct {
- int32_t op_ret;
- int32_t op_errno;
- size_t size;
- off_t offset;
+ dict_t *xattr_req;
- int last_tried;
- } readdir;
+ int32_t inodelk_count;
+ int32_t entrylk_count;
- struct {
- int32_t op_ret;
- int32_t op_errno;
+ afr_internal_lock_t internal_lock;
- size_t size;
- off_t offset;
- int32_t flag;
+ afr_locked_fd_t *locked_fd;
+ int32_t source_child;
+ int32_t lock_recovery_child;
- int last_tried;
- } getdents;
+ dict_t *dict;
+ int optimistic_change_log;
+ gf_boolean_t delayed_post_op;
- /* inode write */
- struct {
- ino_t ino;
- mode_t mode;
- struct stat buf;
- } chmod;
+ /* Is the current writev() going to perform a stable write?
+ i.e, is fd->flags or @flags writev param have O_SYNC or
+ O_DSYNC?
+ */
+ gf_boolean_t stable_write;
+
+ /* This write appended to the file. Nnot necessarily O_APPEND,
+ just means the offset of write was at the end of file.
+ */
+ gf_boolean_t append_write;
+
+ int allow_sh_for_running_transaction;
+
+
+ /* This struct contains the arguments for the "continuation"
+ (scheme-like) of fops
+ */
+
+ int op;
+ struct {
+ struct {
+ unsigned char buf_set;
+ struct statvfs buf;
+ } statfs;
+
+ struct {
+ uint32_t parent_entrylk;
+ uuid_t gfid_req;
+ inode_t *inode;
+ struct iatt buf;
+ struct iatt postparent;
+ dict_t **xattrs;
+ dict_t *xattr;
+ struct iatt *postparents;
+ struct iatt *bufs;
+ int32_t read_child;
+ int32_t *sources;
+ int32_t *success_children;
+ int32_t **pending_matrix;
+ gf_boolean_t fresh_lookup;
+ gf_boolean_t possible_spb;
+ } lookup;
+
+ struct {
+ int32_t flags;
+ } open;
+
+ struct {
+ int32_t cmd;
+ struct gf_flock user_flock;
+ struct gf_flock ret_flock;
+ unsigned char *locked_nodes;
+ } lk;
+
+ /* inode read */
+
+ struct {
+ int32_t mask;
+ int last_index; /* index of the child we tried previously */
+ } access;
+
+ struct {
+ int last_index;
+ } stat;
+
+ struct {
+ int last_index;
+ } fstat;
+
+ struct {
+ size_t size;
+ int last_index;
+ } readlink;
+
+ struct {
+ char *name;
+ int last_index;
+ long xattr_len;
+ } getxattr;
+
+ struct {
+ size_t size;
+ off_t offset;
+ int last_index;
+ uint32_t flags;
+ } readv;
+
+ /* dir read */
+
+ struct {
+ int success_count;
+ int32_t op_ret;
+ int32_t op_errno;
+
+ uint32_t *checksum;
+ } opendir;
+
+ struct {
+ int32_t op_ret;
+ int32_t op_errno;
+ size_t size;
+ off_t offset;
+ dict_t *dict;
+ gf_boolean_t failed;
+ int last_index;
+ } readdir;
+ /* inode write */
+
+ struct {
+ struct iatt prebuf;
+ struct iatt postbuf;
+ } inode_wfop; //common structure for all inode-write-fops
+
+ struct {
+ int32_t op_ret;
+
+ struct iovec *vector;
+ struct iobref *iobref;
+ int32_t count;
+ off_t offset;
+ uint32_t flags;
+ } writev;
+
+ struct {
+ off_t offset;
+ } truncate;
+
+ struct {
+ off_t offset;
+ } ftruncate;
+
+ struct {
+ struct iatt in_buf;
+ int32_t valid;
+ } setattr;
+
+ struct {
+ struct iatt in_buf;
+ int32_t valid;
+ } fsetattr;
+
+ struct {
+ dict_t *dict;
+ int32_t flags;
+ } setxattr;
+
+ struct {
+ dict_t *dict;
+ int32_t flags;
+ } fsetxattr;
+
+ struct {
+ char *name;
+ } removexattr;
+
+ struct {
+ dict_t *xattr;
+ } xattrop;
+
+ struct {
+ dict_t *xattr;
+ } fxattrop;
+
+ /* dir write */
+
+ struct {
+ inode_t *inode;
+ struct iatt buf;
+ struct iatt preparent;
+ struct iatt postparent;
+ struct iatt prenewparent;
+ struct iatt postnewparent;
+ } dir_fop; //common structure for all dir fops
+
+ struct {
+ fd_t *fd;
+ dict_t *params;
+ int32_t flags;
+ mode_t mode;
+ } create;
+
+ struct {
+ dev_t dev;
+ mode_t mode;
+ dict_t *params;
+ } mknod;
+
+ struct {
+ int32_t mode;
+ dict_t *params;
+ } mkdir;
+
+ struct {
+ int flags;
+ } rmdir;
+
+ struct {
+ dict_t *params;
+ char *linkpath;
+ } symlink;
struct {
- ino_t ino;
- mode_t mode;
- struct stat buf;
- } fchmod;
+ int32_t mode;
+ off_t offset;
+ size_t len;
+ } fallocate;
struct {
- ino_t ino;
- uid_t uid;
- gid_t gid;
- struct stat buf;
- } chown;
+ off_t offset;
+ size_t len;
+ } discard;
- struct {
- ino_t ino;
- uid_t uid;
- gid_t gid;
- struct stat buf;
- } fchown;
-
- struct {
- ino_t ino;
- struct stat buf;
+ struct {
+ off_t offset;
+ size_t len;
+ struct iatt prebuf;
+ struct iatt postbuf;
+ } zerofill;
- int32_t op_ret;
- struct iovec *vector;
- struct iobref *iobref;
- int32_t count;
- off_t offset;
- } writev;
+ } cont;
- struct {
- ino_t ino;
- off_t offset;
- struct stat buf;
- } truncate;
+ struct {
+ off_t start, len;
- struct {
- ino_t ino;
- off_t offset;
- struct stat buf;
- } ftruncate;
+ gf_boolean_t eager_lock_on;
+ int *eager_lock;
- struct {
- ino_t ino;
- struct timespec tv[2];
- struct stat buf;
- } utimens;
+ char *basename;
+ char *new_basename;
- struct {
- dict_t *dict;
- int32_t flags;
- } setxattr;
+ loc_t parent_loc;
+ loc_t new_parent_loc;
- struct {
- const char *name;
- } removexattr;
+ afr_transaction_type type;
- /* dir write */
-
- struct {
- ino_t ino;
- fd_t *fd;
- int32_t flags;
- mode_t mode;
- inode_t *inode;
- struct stat buf;
- } create;
+ /* pre-compute the post piggyback status before
+ entering POST-OP phase
+ */
+ int *postop_piggybacked;
- struct {
- ino_t ino;
- dev_t dev;
- mode_t mode;
- inode_t *inode;
- struct stat buf;
- } mknod;
+ /* stub to resume on destruction
+ of the transaction frame */
+ call_stub_t *resume_stub;
- struct {
- ino_t ino;
- int32_t mode;
- inode_t *inode;
- struct stat buf;
- } mkdir;
+ struct list_head eager_locked;
- struct {
- int32_t op_ret;
- int32_t op_errno;
- } unlink;
+ int32_t **txn_changelog;//changelog after pre+post ops
+ unsigned char *pre_op;
- struct {
- int32_t op_ret;
- int32_t op_errno;
- } rmdir;
+ call_frame_t *main_frame;
- struct {
- ino_t ino;
- struct stat buf;
- } rename;
+ int (*fop) (call_frame_t *frame, xlator_t *this);
- struct {
- ino_t ino;
- inode_t *inode;
- struct stat buf;
- } link;
+ int (*done) (call_frame_t *frame, xlator_t *this);
- struct {
- ino_t ino;
- inode_t *inode;
- struct stat buf;
- char *linkpath;
- } symlink;
+ int (*resume) (call_frame_t *frame, xlator_t *this);
- struct {
- int32_t flags;
- dir_entry_t *entries;
- int32_t count;
- } setdents;
- } cont;
-
- struct {
- off_t start, len;
+ int (*unwind) (call_frame_t *frame, xlator_t *this);
- unsigned char *locked_nodes;
- int lock_count;
+ /* post-op hook */
+ } transaction;
- const char *basename;
- const char *new_basename;
+ afr_self_heal_t self_heal;
- loc_t parent_loc;
- loc_t new_parent_loc;
+ struct marker_str marker;
- afr_transaction_type type;
+ /* extra data for fops */
+ dict_t *xdata_req;
+ dict_t *xdata_rsp;
- int success_count;
- int erase_pending;
- int failure_count;
+ mode_t umask;
+ int xflag;
+ gf_boolean_t do_discovery;
+ struct afr_reply *replies;
+} afr_local_t;
- int last_tried;
- int32_t *child_errno;
+typedef enum {
+ AFR_FD_NOT_OPENED,
+ AFR_FD_OPENED,
+ AFR_FD_OPENING
+} afr_fd_open_status_t;
- call_frame_t *main_frame;
+typedef struct {
+ unsigned int *pre_op_done;
+ afr_fd_open_status_t *opened_on; /* which subvolumes the fd is open on */
+ unsigned int *pre_op_piggyback;
- int (*fop) (call_frame_t *frame, xlator_t *this);
+ unsigned int *lock_piggyback;
+ unsigned int *lock_acquired;
- int (*done) (call_frame_t *frame, xlator_t *this);
+ int flags;
+ uint64_t up_count; /* number of CHILD_UPs this fd has seen */
+ uint64_t down_count; /* number of CHILD_DOWNs this fd has seen */
- int (*resume) (call_frame_t *frame, xlator_t *this);
+ int32_t last_tried;
- int (*unwind) (call_frame_t *frame, xlator_t *this);
- } transaction;
+ int hit, miss;
+ gf_boolean_t failed_over;
+ struct list_head entries; /* needed for readdir failover */
- afr_self_heal_t self_heal;
-} afr_local_t;
+ unsigned char *locked_on; /* which subvolumes locks have been successful */
+ /* used for delayed-post-op optimization */
+ pthread_mutex_t delay_lock;
+ gf_timer_t *delay_timer;
+ call_frame_t *delay_frame;
+ int call_child;
-typedef struct {
- unsigned char pre_op_done;
- unsigned char *child_failed;
+ /* set if any write on this fd was a non stable write
+ (i.e, without O_SYNC or O_DSYNC)
+ */
+ gf_boolean_t witnessed_unstable_write;
+
+ /* list of frames currently in progress */
+ struct list_head eager_locked;
} afr_fd_ctx_t;
/* try alloc and if it fails, goto label */
-#define ALLOC_OR_GOTO(var, type, label) do { \
- var = CALLOC (sizeof (type), 1); \
- if (!var) { \
- gf_log (this->name, GF_LOG_ERROR, \
- "out of memory :("); \
- op_errno = ENOMEM; \
- goto label; \
- } \
- } while (0);
+#define AFR_LOCAL_ALLOC_OR_GOTO(var, label) do { \
+ var = mem_get0 (THIS->local_pool); \
+ if (!var) { \
+ gf_log (this->name, GF_LOG_ERROR, \
+ "out of memory :("); \
+ op_errno = ENOMEM; \
+ goto label; \
+ } \
+ } while (0);
/* did a call fail due to a child failing? */
-#define child_went_down(op_ret, op_errno) (((op_ret) < 0) && \
- ((op_errno == ENOTCONN) || \
- (op_errno == EBADFD)))
+#define child_went_down(op_ret, op_errno) (((op_ret) < 0) && \
+ ((op_errno == ENOTCONN) || \
+ (op_errno == EBADFD)))
#define afr_fop_failed(op_ret, op_errno) ((op_ret) == -1)
/* have we tried all children? */
#define all_tried(i, count) ((i) == (count) - 1)
+int32_t
+afr_set_dict_gfid (dict_t *dict, uuid_t gfid);
+
int
-afr_fd_ctx_set (xlator_t *this, fd_t *fd);
+pump_command_reply (call_frame_t *frame, xlator_t *this);
+
+int32_t
+afr_notify (xlator_t *this, int32_t event, void *data, void *data2);
-uint64_t
-afr_read_child (xlator_t *this, inode_t *inode);
+int
+afr_init_entry_lockee (afr_entry_lockee_t *lockee, afr_local_t *local,
+ loc_t *loc, char *basename, int child_count);
void
-afr_set_read_child (xlator_t *this, inode_t *inode, int32_t read_child);
+afr_entry_lockee_cleanup (afr_internal_lock_t *int_lock);
+
+int
+afr_attempt_lock_recovery (xlator_t *this, int32_t child_index);
+
+int
+afr_save_locked_fd (xlator_t *this, fd_t *fd);
+
+int
+afr_mark_locked_nodes (xlator_t *this, fd_t *fd,
+ unsigned char *locked_nodes);
void
-afr_build_parent_loc (loc_t *parent, loc_t *child);
+afr_set_lk_owner (call_frame_t *frame, xlator_t *this, void *lk_owner);
int
-afr_up_children_count (int child_count, unsigned char *child_up);
+afr_set_lock_number (call_frame_t *frame, xlator_t *this);
+
+
+loc_t *
+lower_path (loc_t *l1, const char *b1, loc_t *l2, const char *b2);
+
+int32_t
+afr_unlock (call_frame_t *frame, xlator_t *this);
int
-afr_locked_nodes_count (unsigned char *locked_nodes, int child_count);
+afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this);
+
+int
+afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this);
+
+int
+afr_blocking_lock (call_frame_t *frame, xlator_t *this);
+
+int
+afr_internal_lock_finish (call_frame_t *frame, xlator_t *this);
+
+int
+afr_lk_transfer_datalock (call_frame_t *dst, call_frame_t *src, char *dom,
+ unsigned int child_count);
+
+int pump_start (call_frame_t *frame, xlator_t *this);
+
+int
+__afr_fd_ctx_set (xlator_t *this, fd_t *fd);
+
+int
+afr_fd_ctx_set (xlator_t *this, fd_t *fd);
+
+int32_t
+afr_inode_get_read_ctx (xlator_t *this, inode_t *inode, int32_t *fresh_children);
+
+void
+afr_inode_set_read_ctx (xlator_t *this, inode_t *inode, int32_t read_child,
+ int32_t *fresh_children);
int
-afr_first_up_child (afr_private_t *priv);
+afr_build_parent_loc (loc_t *parent, loc_t *child, int32_t *op_errno);
+
+unsigned int
+afr_up_children_count (unsigned char *child_up, unsigned int child_count);
+
+unsigned int
+afr_locked_children_count (unsigned char *children, unsigned int child_count);
-ino64_t
-afr_itransform (ino64_t ino, int child_count, int child_index);
+unsigned int
+afr_pre_op_done_children_count (unsigned char *pre_op,
+ unsigned int child_count);
+
+gf_boolean_t
+afr_is_fresh_lookup (loc_t *loc, xlator_t *this);
+
+void
+afr_update_loc_gfids (loc_t *loc, struct iatt *buf, struct iatt *postparent);
int
-afr_deitransform (ino64_t ino, int child_count);
+afr_locked_nodes_count (unsigned char *locked_nodes, int child_count);
void
afr_local_cleanup (afr_local_t *local, xlator_t *this);
@@ -486,101 +937,276 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this);
int
afr_frame_return (call_frame_t *frame);
-#define AFR_STACK_UNWIND(frame, params ...) \
- do { \
- afr_local_t *__local = NULL; \
- xlator_t *__this = NULL; \
- __local = frame->local; \
- __this = frame->this; \
- frame->local = NULL; \
- STACK_UNWIND (frame, params); \
- afr_local_cleanup (__local, __this); \
- free (__local); \
-} while (0);
-
-#define AFR_STACK_DESTROY(frame) \
- do { \
- afr_local_t *__local = NULL; \
- xlator_t *__this = NULL; \
- __local = frame->local; \
- __this = frame->this; \
- frame->local = NULL; \
- STACK_DESTROY (frame->root); \
- afr_local_cleanup (__local, __this); \
- free (__local); \
-} while (0);
+gf_boolean_t
+afr_is_split_brain (xlator_t *this, inode_t *inode);
+
+void
+afr_set_split_brain (xlator_t *this, inode_t *inode, afr_spb_state_t mdata_spb,
+ afr_spb_state_t data_spb);
+
+int
+afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata);
+
+void
+afr_set_opendir_done (xlator_t *this, inode_t *inode);
+
+gf_boolean_t
+afr_is_opendir_done (xlator_t *this, inode_t *inode);
+
+void
+afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this);
+
+int
+afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd);
+int
+afr_launch_openfd_self_heal (call_frame_t *frame, xlator_t *this, fd_t *fd);
+
+#define AFR_STACK_UNWIND(fop, frame, params ...) \
+ do { \
+ afr_local_t *__local = NULL; \
+ xlator_t *__this = NULL; \
+ if (frame) { \
+ __local = frame->local; \
+ __this = frame->this; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT (fop, frame, params); \
+ if (__local) { \
+ afr_local_cleanup (__local, __this); \
+ mem_put (__local); \
+ } \
+ } while (0)
+
+#define AFR_STACK_DESTROY(frame) \
+ do { \
+ afr_local_t *__local = NULL; \
+ xlator_t *__this = NULL; \
+ __local = frame->local; \
+ __this = frame->this; \
+ frame->local = NULL; \
+ STACK_DESTROY (frame->root); \
+ if (__local) { \
+ afr_local_cleanup (__local, __this); \
+ mem_put (__local); \
+ } \
+ } while (0);
+
+#define AFR_NUM_CHANGE_LOGS 3 /*data + metadata + entry*/
/* allocate and return a string that is the basename of argument */
-static inline char *
-AFR_BASENAME (const char *str)
+static inline char *
+AFR_BASENAME (const char *str)
{
- char *__tmp_str = NULL;
- char *__basename_str = NULL;
- __tmp_str = strdup (str);
- __basename_str = strdup (basename (__tmp_str));
- FREE (__tmp_str);
- return __basename_str;
+ char *__tmp_str = NULL;
+ char *__basename_str = NULL;
+ __tmp_str = gf_strdup (str);
+ __basename_str = gf_strdup (basename (__tmp_str));
+ GF_FREE (__tmp_str);
+ return __basename_str;
}
-/* initialize local_t */
-static inline int
-AFR_LOCAL_INIT (afr_local_t *local, afr_private_t *priv)
-{
- local->child_up = CALLOC (sizeof (*local->child_up),
- priv->child_count);
- if (!local->child_up) {
- return -ENOMEM;
- }
+int
+afr_transaction_local_init (afr_local_t *local, xlator_t *this);
- memcpy (local->child_up, priv->child_up,
- sizeof (*local->child_up) * priv->child_count);
+int32_t
+afr_marker_getxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name,afr_local_t *local, afr_private_t *priv );
+int32_t *
+afr_children_create (int32_t child_count);
- local->call_count = afr_up_children_count (priv->child_count, local->child_up);
- if (local->call_count == 0)
- return -ENOTCONN;
+int
+afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno);
- local->transaction.erase_pending = 1;
+int
+afr_internal_lock_init (afr_internal_lock_t *lk, size_t child_count,
+ transaction_lk_type_t lk_type);
- local->op_ret = -1;
- local->op_errno = EUCLEAN;
+int
+afr_first_up_child (unsigned char *child_up, size_t child_count);
- return 0;
-}
+int
+afr_select_read_child_from_policy (int32_t *fresh_children, int32_t child_count,
+ int32_t prev_read_child,
+ int32_t config_read_child, int32_t *sources,
+ unsigned int hmode, uuid_t gfid);
+void
+afr_set_read_ctx_from_policy (xlator_t *this, inode_t *inode,
+ int32_t *fresh_children, int32_t prev_read_child,
+ int32_t config_read_child, uuid_t gfid);
+
+int32_t
+afr_get_call_child (xlator_t *this, unsigned char *child_up, int32_t read_child,
+ int32_t *fresh_children,
+ int32_t *call_child, int32_t *last_index);
+
+int32_t
+afr_next_call_child (int32_t *fresh_children, unsigned char *child_up,
+ size_t child_count, int32_t *last_index,
+ int32_t read_child);
+void
+afr_get_fresh_children (int32_t *success_children, int32_t *sources,
+ int32_t *children, unsigned int child_count);
+void
+afr_children_add_child (int32_t *children, int32_t child,
+ int32_t child_count);
+void
+afr_children_rm_child (int32_t *children, int32_t child,
+ int32_t child_count);
+void
+afr_reset_children (int32_t *children, int32_t child_count);
+int32_t
+afr_most_important_error(int32_t old_errno, int32_t new_errno,
+ gf_boolean_t eio);
+int
+afr_errno_count (int32_t *children, int *child_errno,
+ unsigned int child_count, int32_t op_errno);
+int
+afr_get_children_count (int32_t *children, unsigned int child_count);
+gf_boolean_t
+afr_is_child_present (int32_t *success_children, int32_t child_count,
+ int32_t child);
+void
+afr_update_gfid_from_iatts (uuid_t uuid, struct iatt *bufs,
+ int32_t *success_children,
+ unsigned int child_count);
+void
+afr_reset_xattr (dict_t **xattr, unsigned int child_count);
+gf_boolean_t
+afr_conflicting_iattrs (struct iatt *bufs, int32_t *success_children,
+ unsigned int child_count, const char *path,
+ const char *xlator_name);
+unsigned int
+afr_gfid_missing_count (const char *xlator_name, int32_t *children,
+ struct iatt *bufs, unsigned int child_count,
+ const char *path);
+void
+afr_xattr_req_prepare (xlator_t *this, dict_t *xattr_req, const char *path);
+void
+afr_children_copy (int32_t *dst, int32_t *src, unsigned int child_count);
+afr_transaction_type
+afr_transaction_type_get (ia_type_t ia_type);
+int32_t
+afr_resultant_errno_get (int32_t *children,
+ int *child_errno, unsigned int child_count);
+void
+afr_inode_rm_stale_children (xlator_t *this, inode_t *inode,
+ int32_t *stale_children);
+void
+afr_launch_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ gf_boolean_t background, ia_type_t ia_type, char *reason,
+ void (*gfid_sh_success_cbk) (call_frame_t *sh_frame,
+ xlator_t *this),
+ int (*unwind) (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ int32_t sh_failed));
+void
+afr_fix_open (xlator_t *this, fd_t *fd, size_t need_open_count, int *need_open);
-static inline int
-afr_transaction_local_init (afr_local_t *local, afr_private_t *priv)
-{
- int i;
-
- local->child_errno = CALLOC (sizeof (*local->child_errno),
- priv->child_count);
- if (!local->child_errno) {
- return -ENOMEM;
- }
-
- local->pending = CALLOC (sizeof (*local->pending),
- priv->child_count);
-
- if (!local->pending) {
- return -ENOMEM;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- local->pending[i] = CALLOC (sizeof (*local->pending[i]),
- 3); /* data + metadata + entry */
- if (!local->pending[i])
- return -ENOMEM;
- }
-
- local->transaction.locked_nodes = CALLOC (sizeof (*local->transaction.locked_nodes),
- priv->child_count);
+void
+afr_open_fd_fix (fd_t *fd, xlator_t *this);
+int
+afr_set_elem_count_get (unsigned char *elems, int child_count);
- local->transaction.child_errno = CALLOC (sizeof (*local->transaction.child_errno),
- priv->child_count);
+afr_fd_ctx_t *
+afr_fd_ctx_get (fd_t *fd, xlator_t *this);
- return 0;
-}
+gf_boolean_t
+afr_open_only_data_self_heal (char *data_self_heal);
+
+gf_boolean_t
+afr_data_self_heal_enabled (char *data_self_heal);
+
+void
+afr_set_low_priority (call_frame_t *frame);
+int
+afr_child_fd_ctx_set (xlator_t *this, fd_t *fd, int32_t child,
+ int flags);
+
+gf_boolean_t
+afr_have_quorum (char *logname, afr_private_t *priv);
+
+void
+afr_matrix_cleanup (int32_t **pending, unsigned int m);
+
+int32_t**
+afr_matrix_create (unsigned int m, unsigned int n);
+
+gf_boolean_t
+afr_is_errno_set (int *child_errno, int child);
+
+gf_boolean_t
+afr_is_errno_unset (int *child_errno, int child);
+
+gf_boolean_t
+afr_is_fd_fixable (fd_t *fd);
+
+void
+afr_prepare_new_entry_pending_matrix (int32_t **pending,
+ gf_boolean_t (*is_pending) (int *, int),
+ int *ctx, struct iatt *buf,
+ unsigned int child_count);
+void
+afr_xattr_array_destroy (dict_t **xattr, unsigned int child_count);
+/*
+ * Special value indicating we should use the "auto" quorum method instead of
+ * a fixed value (including zero to turn off quorum enforcement).
+ */
+#define AFR_QUORUM_AUTO INT_MAX
+
+/*
+ * Having this as a macro will make debugging a bit weirder, but does reduce
+ * the probability of functions handling this check inconsistently.
+ */
+#define QUORUM_CHECK(_func,_label) do { \
+ if (priv->quorum_count && !afr_have_quorum(this->name,priv)) { \
+ gf_log(this->name,GF_LOG_WARNING, \
+ "failing "#_func" due to lack of quorum"); \
+ op_errno = EROFS; \
+ goto _label; \
+ } \
+} while (0);
+
+
+#define AFR_SBRAIN_MSG "Failed on %s as split-brain is seen. Returning EIO."
+
+#define AFR_SBRAIN_CHECK_FD(fd, label) do { \
+ if (fd->inode && afr_is_split_brain (this, fd->inode)) { \
+ op_errno = EIO; \
+ gf_log (this->name, GF_LOG_WARNING, \
+ AFR_SBRAIN_MSG ,uuid_utoa (fd->inode->gfid)); \
+ goto label; \
+ } \
+} while (0)
+
+#define AFR_SBRAIN_CHECK_LOC(loc, label) do { \
+ if (loc->inode && afr_is_split_brain (this, loc->inode)) { \
+ op_errno = EIO; \
+ loc_path (loc, NULL); \
+ gf_log (this->name, GF_LOG_WARNING, \
+ AFR_SBRAIN_MSG , loc->path); \
+ goto label; \
+ } \
+} while (0)
+
+int
+afr_fd_report_unstable_write (xlator_t *this, fd_t *fd);
+
+gf_boolean_t
+afr_fd_has_witnessed_unstable_write (xlator_t *this, fd_t *fd);
+
+void
+afr_delayed_changelog_wake_resume (xlator_t *this, fd_t *fd, call_stub_t *stub);
+
+int
+afr_inodelk_init (afr_inodelk_t *lk, char *dom, size_t child_count);
+
+void
+afr_handle_open_fd_count (call_frame_t *frame, xlator_t *this);
+
+afr_inode_ctx_t*
+afr_inode_ctx_get (inode_t *inode, xlator_t *this);
#endif /* __AFR_H__ */
diff --git a/xlators/cluster/afr/src/pump.c b/xlators/cluster/afr/src/pump.c
new file mode 100644
index 000000000..a7f72fb30
--- /dev/null
+++ b/xlators/cluster/afr/src/pump.c
@@ -0,0 +1,2641 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <unistd.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <fnmatch.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "afr-common.c"
+#include "defaults.c"
+#include "glusterfs.h"
+
+static uint64_t pump_pid = 0;
+static inline void
+pump_fill_loc_info (loc_t *loc, struct iatt *iatt, struct iatt *parent)
+{
+ afr_update_loc_gfids (loc, iatt, parent);
+ uuid_copy (loc->inode->gfid, iatt->ia_gfid);
+}
+
+static int
+pump_mark_start_pending (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ pump_priv->pump_start_pending = 1;
+
+ return 0;
+}
+
+static int
+is_pump_start_pending (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ return (pump_priv->pump_start_pending);
+}
+
+static int
+pump_remove_start_pending (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ pump_priv->pump_start_pending = 0;
+
+ return 0;
+}
+
+static pump_state_t
+pump_get_state ()
+{
+ xlator_t *this = NULL;
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ pump_state_t ret;
+
+ this = THIS;
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ LOCK (&pump_priv->pump_state_lock);
+ {
+ ret = pump_priv->pump_state;
+ }
+ UNLOCK (&pump_priv->pump_state_lock);
+
+ return ret;
+}
+
+int
+pump_change_state (xlator_t *this, pump_state_t state)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ pump_state_t state_old;
+ pump_state_t state_new;
+
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ GF_ASSERT (pump_priv);
+
+ LOCK (&pump_priv->pump_state_lock);
+ {
+ state_old = pump_priv->pump_state;
+ state_new = state;
+
+ pump_priv->pump_state = state;
+
+ }
+ UNLOCK (&pump_priv->pump_state_lock);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Pump changing state from %d to %d",
+ state_old,
+ state_new);
+
+ return 0;
+}
+
+static int
+pump_set_resume_path (xlator_t *this, const char *path)
+{
+ int ret = 0;
+
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ GF_ASSERT (pump_priv);
+
+ LOCK (&pump_priv->resume_path_lock);
+ {
+ strncpy (pump_priv->resume_path, path, strlen (path) + 1);
+ }
+ UNLOCK (&pump_priv->resume_path_lock);
+
+ return ret;
+}
+
+static int
+pump_save_path (xlator_t *this, const char *path)
+{
+ afr_private_t *priv = NULL;
+ pump_state_t state;
+ dict_t *dict = NULL;
+ loc_t loc = {0};
+ int dict_ret = 0;
+ int ret = -1;
+
+ state = pump_get_state ();
+ if (state == PUMP_STATE_RESUME)
+ return 0;
+
+ priv = this->private;
+
+ GF_ASSERT (priv->root_inode);
+
+ afr_build_root_loc (this, &loc);
+
+ dict = dict_new ();
+ dict_ret = dict_set_str (dict, PUMP_PATH, (char *)path);
+ if (dict_ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set the key %s", path, PUMP_PATH);
+
+ ret = syncop_setxattr (PUMP_SOURCE_CHILD (this), &loc, dict, 0);
+
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "setxattr failed - could not save path=%s", path);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "setxattr succeeded - saved path=%s", path);
+ }
+
+ dict_unref (dict);
+
+ loc_wipe (&loc);
+ return 0;
+}
+
+static int
+pump_check_and_update_status (xlator_t *this)
+{
+ pump_state_t state;
+ int ret = -1;
+
+ state = pump_get_state ();
+
+ switch (state) {
+
+ case PUMP_STATE_RESUME:
+ case PUMP_STATE_RUNNING:
+ {
+ ret = 0;
+ break;
+ }
+ case PUMP_STATE_PAUSE:
+ {
+ ret = -1;
+ break;
+ }
+ case PUMP_STATE_ABORT:
+ {
+ pump_save_path (this, "/");
+ ret = -1;
+ break;
+ }
+ default:
+ {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Unknown pump state");
+ ret = -1;
+ break;
+ }
+
+ }
+
+ return ret;
+}
+
+static const char *
+pump_get_resume_path (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ const char *resume_path = NULL;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ resume_path = pump_priv->resume_path;
+
+ return resume_path;
+}
+
+static int
+pump_update_resume_state (xlator_t *this, const char *path)
+{
+ pump_state_t state;
+ const char *resume_path = NULL;
+
+ state = pump_get_state ();
+
+ if (state == PUMP_STATE_RESUME) {
+ resume_path = pump_get_resume_path (this);
+ if (strcmp (resume_path, "/") == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Reached the resume path (/). Proceeding to change state"
+ " to running");
+ pump_change_state (this, PUMP_STATE_RUNNING);
+ } else if (strcmp (resume_path, path) == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Reached the resume path. Proceeding to change state"
+ " to running");
+ pump_change_state (this, PUMP_STATE_RUNNING);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Not yet hit the resume path:res-path=%s,path=%s",
+ resume_path, path);
+ }
+ }
+
+ return 0;
+}
+
+static gf_boolean_t
+is_pump_traversal_allowed (xlator_t *this, const char *path)
+{
+ pump_state_t state;
+ const char *resume_path = NULL;
+ gf_boolean_t ret = _gf_true;
+
+ state = pump_get_state ();
+
+ if (state == PUMP_STATE_RESUME) {
+ resume_path = pump_get_resume_path (this);
+ if (strstr (resume_path, path)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "On the right path to resumption path");
+ ret = _gf_true;
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Not the right path to resuming=> ignoring traverse");
+ ret = _gf_false;
+ }
+ }
+
+ return ret;
+}
+
+static int
+pump_save_file_stats (xlator_t *this, const char *path)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ LOCK (&pump_priv->resume_path_lock);
+ {
+ pump_priv->number_files_pumped++;
+
+ strncpy (pump_priv->current_file, path,
+ PATH_MAX);
+ }
+ UNLOCK (&pump_priv->resume_path_lock);
+
+ return 0;
+}
+
+static int
+gf_pump_traverse_directory (loc_t *loc)
+{
+ xlator_t *this = NULL;
+ fd_t *fd = NULL;
+ off_t offset = 0;
+ loc_t entry_loc = {0};
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+ gf_dirent_t entries;
+ struct iatt iatt = {0};
+ struct iatt parent = {0};
+ dict_t *xattr_rsp = NULL;
+ int ret = 0;
+ gf_boolean_t is_directory_empty = _gf_true;
+ gf_boolean_t free_entries = _gf_false;
+
+ INIT_LIST_HEAD (&entries.list);
+ this = THIS;
+
+ GF_ASSERT (loc->inode);
+
+ fd = fd_create (loc->inode, pump_pid);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to create fd for %s", loc->path);
+ goto out;
+ }
+
+ ret = syncop_opendir (this, loc, fd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "opendir failed on %s", loc->path);
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "pump opendir on %s returned=%d",
+ loc->path, ret);
+
+ while (syncop_readdirp (this, fd, 131072, offset, NULL, &entries)) {
+ free_entries = _gf_true;
+
+ if (list_empty (&entries.list)) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "no more entries in directory");
+ goto out;
+ }
+
+ list_for_each_entry_safe (entry, tmp, &entries.list, list) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "found readdir entry=%s", entry->d_name);
+
+ offset = entry->d_off;
+ if (uuid_is_null (entry->d_stat.ia_gfid)) {
+ gf_log (this->name, GF_LOG_WARNING, "%s/%s: No "
+ "gfid present skipping",
+ loc->path, entry->d_name);
+ continue;
+ }
+ loc_wipe (&entry_loc);
+ ret = afr_build_child_loc (this, &entry_loc, loc,
+ entry->d_name);
+ if (ret)
+ goto out;
+
+ if (!IS_ENTRY_CWD (entry->d_name) &&
+ !IS_ENTRY_PARENT (entry->d_name)) {
+
+ is_directory_empty = _gf_false;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "lookup %s => %"PRId64,
+ entry_loc.path,
+ iatt.ia_ino);
+
+ ret = syncop_lookup (this, &entry_loc, NULL,
+ &iatt, &xattr_rsp, &parent);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: lookup failed",
+ entry_loc.path);
+ continue;
+ }
+ pump_fill_loc_info (&entry_loc, &iatt,
+ &parent);
+
+ pump_update_resume_state (this, entry_loc.path);
+
+ pump_save_path (this, entry_loc.path);
+ pump_save_file_stats (this, entry_loc.path);
+
+ ret = pump_check_and_update_status (this);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Pump beginning to exit out");
+ goto out;
+ }
+
+ if (IA_ISDIR (iatt.ia_type)) {
+ if (is_pump_traversal_allowed (this, entry_loc.path)) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "entering dir=%s",
+ entry->d_name);
+ gf_pump_traverse_directory (&entry_loc);
+ }
+ }
+ }
+ }
+
+ gf_dirent_free (&entries);
+ free_entries = _gf_false;
+ gf_log (this->name, GF_LOG_TRACE,
+ "offset incremented to %d",
+ (int32_t ) offset);
+
+ }
+
+ ret = syncop_close (fd);
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_DEBUG, "closing the fd failed");
+
+ if (is_directory_empty && IS_ROOT_PATH (loc->path)) {
+ pump_change_state (this, PUMP_STATE_RUNNING);
+ gf_log (this->name, GF_LOG_INFO, "Empty source brick. "
+ "Nothing to be done.");
+ }
+
+out:
+ if (entry_loc.path)
+ loc_wipe (&entry_loc);
+ if (free_entries)
+ gf_dirent_free (&entries);
+ return 0;
+}
+
+static int
+pump_update_resume_path (xlator_t *this)
+{
+ const char *resume_path = NULL;
+
+ resume_path = pump_get_resume_path (this);
+
+ if (resume_path) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Found a path to resume from: %s",
+ resume_path);
+
+ }else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Did not find a path=> setting to '/'");
+ pump_set_resume_path (this, "/");
+ }
+
+ pump_change_state (this, PUMP_STATE_RESUME);
+
+ return 0;
+}
+
+static int32_t
+pump_xattr_cleaner (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ loc_t loc = {0};
+ int i = 0;
+ int ret = 0;
+ int source = 0;
+ int sink = 1;
+
+ priv = this->private;
+
+ afr_build_root_loc (this, &loc);
+
+ ret = syncop_removexattr (priv->children[source], &loc,
+ PUMP_PATH);
+
+ ret = syncop_removexattr (priv->children[sink], &loc,
+ PUMP_SINK_COMPLETE);
+
+ for (i = 0; i < priv->child_count; i++) {
+ ret = syncop_removexattr (priv->children[i], &loc,
+ PUMP_SOURCE_COMPLETE);
+ if (ret)
+ gf_log (this->name, GF_LOG_DEBUG, "removexattr "
+ "failed with %s", strerror (errno));
+ }
+
+ loc_wipe (&loc);
+ return pump_command_reply (frame, this);
+}
+
+static int
+pump_complete_migration (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+ dict_t *dict = NULL;
+ pump_state_t state;
+ loc_t loc = {0};
+ int dict_ret = 0;
+ int ret = -1;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ GF_ASSERT (priv->root_inode);
+
+ afr_build_root_loc (this, &loc);
+
+ dict = dict_new ();
+
+ state = pump_get_state ();
+ if (state == PUMP_STATE_RUNNING) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Pump finished pumping");
+
+ pump_priv->pump_finished = _gf_true;
+
+ dict_ret = dict_set_str (dict, PUMP_SOURCE_COMPLETE, "jargon");
+ if (dict_ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set the key %s",
+ loc.path, PUMP_SOURCE_COMPLETE);
+
+ ret = syncop_setxattr (PUMP_SOURCE_CHILD (this), &loc, dict, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "setxattr failed - while notifying source complete");
+ }
+ dict_ret = dict_set_str (dict, PUMP_SINK_COMPLETE, "jargon");
+ if (dict_ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set the key %s",
+ loc.path, PUMP_SINK_COMPLETE);
+
+ ret = syncop_setxattr (PUMP_SINK_CHILD (this), &loc, dict, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "setxattr failed - while notifying sink complete");
+ }
+
+ pump_save_path (this, "/");
+
+ } else if (state == PUMP_STATE_ABORT) {
+ gf_log (this->name, GF_LOG_DEBUG, "Starting cleanup "
+ "of pump internal xattrs");
+ call_resume (pump_priv->cleaner);
+ }
+
+ loc_wipe (&loc);
+ return 0;
+}
+
+static int
+pump_lookup_sink (loc_t *loc)
+{
+ xlator_t *this = NULL;
+ struct iatt iatt, parent;
+ dict_t *xattr_rsp;
+ dict_t *xattr_req = NULL;
+ int ret = 0;
+
+ this = THIS;
+
+ xattr_req = dict_new ();
+
+ ret = afr_set_root_gfid (xattr_req);
+ if (ret)
+ goto out;
+
+ ret = syncop_lookup (PUMP_SINK_CHILD (this), loc,
+ xattr_req, &iatt, &xattr_rsp, &parent);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Lookup on sink child failed");
+ goto out;
+ }
+
+out:
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ return ret;
+}
+
+static int
+pump_task (void *data)
+{
+ xlator_t *this = NULL;
+ afr_private_t *priv = NULL;
+
+
+ loc_t loc = {0};
+ struct iatt iatt, parent;
+ dict_t *xattr_rsp = NULL;
+ dict_t *xattr_req = NULL;
+
+ int ret = -1;
+
+ this = THIS;
+ priv = this->private;
+
+ GF_ASSERT (priv->root_inode);
+
+ afr_build_root_loc (this, &loc);
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Out of memory");
+ ret = -1;
+ goto out;
+ }
+
+ afr_set_root_gfid (xattr_req);
+ ret = syncop_lookup (this, &loc, xattr_req,
+ &iatt, &xattr_rsp, &parent);
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "lookup: path=%s gfid=%s",
+ loc.path, uuid_utoa (loc.inode->gfid));
+
+ ret = pump_check_and_update_status (this);
+ if (ret < 0) {
+ goto out;
+ }
+
+ pump_update_resume_path (this);
+
+ afr_set_root_gfid (xattr_req);
+ ret = pump_lookup_sink (&loc);
+ if (ret) {
+ pump_update_resume_path (this);
+ goto out;
+ }
+
+ gf_pump_traverse_directory (&loc);
+
+ pump_complete_migration (this);
+out:
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ loc_wipe (&loc);
+ return 0;
+}
+
+
+static int
+pump_task_completion (int ret, call_frame_t *sync_frame, void *data)
+{
+ xlator_t *this = NULL;
+ afr_private_t *priv = NULL;
+
+ this = THIS;
+
+ priv = this->private;
+
+ inode_unref (priv->root_inode);
+ STACK_DESTROY (sync_frame->root);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Pump xlator exiting");
+ return 0;
+}
+
+int
+pump_start (call_frame_t *pump_frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ int ret = -1;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ afr_set_lk_owner (pump_frame, this, pump_frame->root);
+ pump_pid = (uint64_t) (unsigned long)pump_frame->root;
+
+ ret = synctask_new (pump_priv->env, pump_task,
+ pump_task_completion,
+ pump_frame, NULL);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "starting pump failed");
+ pump_change_state (this, PUMP_STATE_ABORT);
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "setting pump as started lk_owner: %s %"PRIu64,
+ lkowner_utoa (&pump_frame->root->lk_owner), pump_pid);
+
+ priv->use_afr_in_pump = 1;
+out:
+ return ret;
+}
+
+static int
+pump_start_synctask (xlator_t *this)
+{
+ call_frame_t *frame = NULL;
+ int ret = 0;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory");
+ ret = -1;
+ goto out;
+ }
+
+ pump_change_state (this, PUMP_STATE_RUNNING);
+
+ ret = pump_start (frame, this);
+
+out:
+ return ret;
+}
+
+int32_t
+pump_cmd_start_setxattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+
+{
+ call_frame_t *prev = NULL;
+ afr_local_t *local = NULL;
+ int ret = 0;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not initiate destination "
+ "brick connect");
+ ret = op_ret;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Successfully initiated destination "
+ "brick connect");
+
+ pump_mark_start_pending (this);
+
+ /* send the PARENT_UP as pump is ready now */
+ prev = cookie;
+ if (prev && prev->this)
+ prev->this->notify (prev->this, GF_EVENT_PARENT_UP, this);
+
+out:
+ local->op_ret = ret;
+ pump_command_reply (frame, this);
+
+ return 0;
+}
+
+static int
+pump_initiate_sink_connect (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *dict = NULL;
+ data_t *data = NULL;
+ char *clnt_cmd = NULL;
+ loc_t loc = {0};
+
+ int ret = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ GF_ASSERT (priv->root_inode);
+
+ afr_build_root_loc (this, &loc);
+
+ data = data_ref (dict_get (local->dict, RB_PUMP_CMD_START));
+ if (!data) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not get destination brick value");
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ clnt_cmd = GF_CALLOC (1, data->len+1, gf_common_mt_char);
+ if (!clnt_cmd) {
+ ret = -1;
+ goto out;
+ }
+
+ memcpy (clnt_cmd, data->data, data->len);
+ clnt_cmd[data->len] = '\0';
+ gf_log (this->name, GF_LOG_DEBUG, "Got destination brick %s\n",
+ clnt_cmd);
+
+ ret = dict_set_dynstr (dict, CLIENT_CMD_CONNECT, clnt_cmd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not inititiate destination brick "
+ "connect");
+ goto out;
+ }
+
+ STACK_WIND (frame,
+ pump_cmd_start_setxattr_cbk,
+ PUMP_SINK_CHILD(this),
+ PUMP_SINK_CHILD(this)->fops->setxattr,
+ &loc,
+ dict,
+ 0, NULL);
+
+ ret = 0;
+
+out:
+ if (dict)
+ dict_unref (dict);
+
+ if (data)
+ data_unref (data);
+
+ if (ret && clnt_cmd)
+ GF_FREE (clnt_cmd);
+
+ loc_wipe (&loc);
+ return ret;
+}
+
+static int
+is_pump_aborted (xlator_t *this)
+{
+ pump_state_t state;
+
+ state = pump_get_state ();
+
+ return ((state == PUMP_STATE_ABORT));
+}
+
+int32_t
+pump_cmd_start_getxattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ char *path = NULL;
+
+ pump_state_t state;
+ int ret = 0;
+ int need_unwind = 0;
+ int dict_ret = -1;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "getxattr failed - changing pump "
+ "state to RUNNING with '/'");
+ path = "/";
+ ret = op_ret;
+ } else {
+ gf_log (this->name, GF_LOG_TRACE,
+ "getxattr succeeded");
+
+ dict_ret = dict_get_str (dict, PUMP_PATH, &path);
+ if (dict_ret < 0)
+ path = "/";
+ }
+
+ state = pump_get_state ();
+ if ((state == PUMP_STATE_RUNNING) ||
+ (state == PUMP_STATE_RESUME)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Pump is already started");
+ ret = -1;
+ goto out;
+ }
+
+ pump_set_resume_path (this, path);
+
+ if (is_pump_aborted (this))
+ /* We're re-starting pump afresh */
+ ret = pump_initiate_sink_connect (frame, this);
+ else {
+ /* We're re-starting pump from a previous
+ pause */
+ gf_log (this->name, GF_LOG_DEBUG,
+ "about to start synctask");
+ ret = pump_start_synctask (this);
+ need_unwind = 1;
+ }
+
+out:
+ if ((ret < 0) || (need_unwind == 1)) {
+ local->op_ret = ret;
+ pump_command_reply (frame, this);
+ }
+ return 0;
+}
+
+int
+pump_execute_status (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ uint64_t number_files = 0;
+
+ char filename[PATH_MAX];
+ char summary[PATH_MAX+256];
+ char *dict_str = NULL;
+
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+
+ dict_t *dict = NULL;
+ int ret = -1;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ LOCK (&pump_priv->resume_path_lock);
+ {
+ number_files = pump_priv->number_files_pumped;
+ strncpy (filename, pump_priv->current_file, PATH_MAX);
+ }
+ UNLOCK (&pump_priv->resume_path_lock);
+
+ dict_str = GF_CALLOC (1, PATH_MAX + 256, gf_afr_mt_char);
+ if (!dict_str) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory");
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (pump_priv->pump_finished) {
+ snprintf (summary, PATH_MAX+256,
+ "no_of_files=%"PRIu64, number_files);
+ } else {
+ snprintf (summary, PATH_MAX+256,
+ "no_of_files=%"PRIu64":current_file=%s",
+ number_files, filename);
+ }
+ snprintf (dict_str, PATH_MAX+256, "status=%d:%s",
+ (pump_priv->pump_finished)?1:0, summary);
+
+ dict = dict_new ();
+
+ ret = dict_set_dynstr (dict, RB_PUMP_CMD_STATUS, dict_str);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "dict_set_dynstr returned negative value");
+ } else {
+ dict_str = NULL;
+ }
+
+ op_ret = 0;
+
+out:
+
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, NULL);
+
+ if (dict)
+ dict_unref (dict);
+
+ GF_FREE (dict_str);
+
+ return 0;
+}
+
+int
+pump_execute_pause (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ pump_change_state (this, PUMP_STATE_PAUSE);
+
+ local->op_ret = 0;
+ pump_command_reply (frame, this);
+
+ return 0;
+}
+
+int
+pump_execute_start (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ int ret = 0;
+ loc_t loc = {0};
+
+ priv = this->private;
+ local = frame->local;
+
+ if (!priv->root_inode) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Pump xlator cannot be started without an initial "
+ "lookup");
+ ret = -1;
+ goto out;
+ }
+
+ GF_ASSERT (priv->root_inode);
+
+ afr_build_root_loc (this, &loc);
+
+ STACK_WIND (frame,
+ pump_cmd_start_getxattr_cbk,
+ PUMP_SOURCE_CHILD(this),
+ PUMP_SOURCE_CHILD(this)->fops->getxattr,
+ &loc,
+ PUMP_PATH, NULL);
+
+ ret = 0;
+
+out:
+ if (ret < 0) {
+ local->op_ret = ret;
+ pump_command_reply (frame, this);
+ }
+
+ loc_wipe (&loc);
+ return 0;
+}
+
+static int
+pump_cleanup_helper (void *data) {
+ call_frame_t *frame = data;
+
+ pump_xattr_cleaner (frame, 0, frame->this, 0, 0, NULL);
+
+ return 0;
+}
+
+static int
+pump_cleanup_done (int ret, call_frame_t *sync_frame, void *data)
+{
+ STACK_DESTROY (sync_frame->root);
+
+ return 0;
+}
+
+int
+pump_execute_commit (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *sync_frame = NULL;
+ int ret = 0;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+ local = frame->local;
+
+ local->op_ret = 0;
+ if (pump_priv->pump_finished) {
+ pump_change_state (this, PUMP_STATE_COMMIT);
+ sync_frame = create_frame (this, this->ctx->pool);
+ ret = synctask_new (pump_priv->env, pump_cleanup_helper,
+ pump_cleanup_done, sync_frame, frame);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "Couldn't create "
+ "synctask for cleaning up xattrs.");
+ }
+
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "Commit can't proceed. "
+ "Migration in progress");
+ local->op_ret = -1;
+ local->op_errno = EINPROGRESS;
+ pump_command_reply (frame, this);
+ }
+
+ return 0;
+}
+int
+pump_execute_abort (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *sync_frame = NULL;
+ int ret = 0;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+ local = frame->local;
+
+ pump_change_state (this, PUMP_STATE_ABORT);
+
+ LOCK (&pump_priv->resume_path_lock);
+ {
+ pump_priv->number_files_pumped = 0;
+ pump_priv->current_file[0] = '\0';
+ }
+ UNLOCK (&pump_priv->resume_path_lock);
+
+ local->op_ret = 0;
+ if (pump_priv->pump_finished) {
+ sync_frame = create_frame (this, this->ctx->pool);
+ ret = synctask_new (pump_priv->env, pump_cleanup_helper,
+ pump_cleanup_done, sync_frame, frame);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "Couldn't create "
+ "synctask for cleaning up xattrs.");
+ }
+
+ } else {
+ pump_priv->cleaner = fop_setxattr_cbk_stub (frame,
+ pump_xattr_cleaner,
+ 0, 0, NULL);
+ }
+
+ return 0;
+}
+
+gf_boolean_t
+pump_command_status (xlator_t *this, dict_t *dict)
+{
+ char *cmd = NULL;
+ int dict_ret = -1;
+ int ret = _gf_true;
+
+ dict_ret = dict_get_str (dict, RB_PUMP_CMD_STATUS, &cmd);
+ if (dict_ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Not a pump status command");
+ ret = _gf_false;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Hit a pump command - status");
+ ret = _gf_true;
+
+out:
+ return ret;
+
+}
+
+gf_boolean_t
+pump_command_pause (xlator_t *this, dict_t *dict)
+{
+ char *cmd = NULL;
+ int dict_ret = -1;
+ int ret = _gf_true;
+
+ dict_ret = dict_get_str (dict, RB_PUMP_CMD_PAUSE, &cmd);
+ if (dict_ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Not a pump pause command");
+ ret = _gf_false;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Hit a pump command - pause");
+ ret = _gf_true;
+
+out:
+ return ret;
+
+}
+
+gf_boolean_t
+pump_command_commit (xlator_t *this, dict_t *dict)
+{
+ char *cmd = NULL;
+ int dict_ret = -1;
+ int ret = _gf_true;
+
+ dict_ret = dict_get_str (dict, RB_PUMP_CMD_COMMIT, &cmd);
+ if (dict_ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Not a pump commit command");
+ ret = _gf_false;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Hit a pump command - commit");
+ ret = _gf_true;
+
+out:
+ return ret;
+
+}
+
+gf_boolean_t
+pump_command_abort (xlator_t *this, dict_t *dict)
+{
+ char *cmd = NULL;
+ int dict_ret = -1;
+ int ret = _gf_true;
+
+ dict_ret = dict_get_str (dict, RB_PUMP_CMD_ABORT, &cmd);
+ if (dict_ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Not a pump abort command");
+ ret = _gf_false;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Hit a pump command - abort");
+ ret = _gf_true;
+
+out:
+ return ret;
+
+}
+
+gf_boolean_t
+pump_command_start (xlator_t *this, dict_t *dict)
+{
+ char *cmd = NULL;
+ int dict_ret = -1;
+ int ret = _gf_true;
+
+ dict_ret = dict_get_str (dict, RB_PUMP_CMD_START, &cmd);
+ if (dict_ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Not a pump start command");
+ ret = _gf_false;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Hit a pump command - start");
+ ret = _gf_true;
+
+out:
+ return ret;
+
+}
+
+struct _xattr_key {
+ char *key;
+ struct list_head list;
+};
+
+static int
+__gather_xattr_keys (dict_t *dict, char *key, data_t *value,
+ void *data)
+{
+ struct list_head * list = data;
+ struct _xattr_key * xkey = NULL;
+
+ if (!strncmp (key, AFR_XATTR_PREFIX,
+ strlen (AFR_XATTR_PREFIX))) {
+
+ xkey = GF_CALLOC (1, sizeof (*xkey), gf_afr_mt_xattr_key);
+ if (!xkey)
+ return -1;
+
+ xkey->key = key;
+ INIT_LIST_HEAD (&xkey->list);
+
+ list_add_tail (&xkey->list, list);
+ }
+ return 0;
+}
+
+static void
+__filter_xattrs (dict_t *dict)
+{
+ struct list_head keys;
+
+ struct _xattr_key *key;
+ struct _xattr_key *tmp;
+
+ INIT_LIST_HEAD (&keys);
+
+ dict_foreach (dict, __gather_xattr_keys,
+ (void *) &keys);
+
+ list_for_each_entry_safe (key, tmp, &keys, list) {
+ dict_del (dict, key->key);
+
+ list_del_init (&key->list);
+
+ GF_FREE (key);
+ }
+}
+
+int32_t
+pump_getxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
+ int unwind = 1;
+ int32_t *last_index = NULL;
+ int32_t next_call_child = -1;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
+
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+
+ read_child = (long) cookie;
+
+ if (op_ret == -1) {
+ last_index = &local->cont.getxattr.last_index;
+ fresh_children = local->fresh_children;
+ next_call_child = afr_next_call_child (fresh_children,
+ local->child_up,
+ priv->child_count,
+ last_index, read_child);
+ if (next_call_child < 0)
+ goto out;
+
+ unwind = 0;
+ STACK_WIND_COOKIE (frame, pump_getxattr_cbk,
+ (void *) (long) read_child,
+ children[next_call_child],
+ children[next_call_child]->fops->getxattr,
+ &local->loc,
+ local->cont.getxattr.name, NULL);
+ }
+
+out:
+ if (unwind) {
+ if (op_ret >= 0 && dict)
+ __filter_xattrs (dict);
+
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, NULL);
+ }
+
+ return 0;
+}
+
+int32_t
+pump_getxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ afr_private_t * priv = NULL;
+ xlator_t ** children = NULL;
+ int call_child = 0;
+ afr_local_t *local = NULL;
+ int32_t ret = -1;
+ int32_t op_errno = 0;
+ uint64_t read_child = 0;
+
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv->children, out);
+
+ children = priv->children;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame, default_getxattr_cbk,
+ FIRST_CHILD (this),
+ (FIRST_CHILD (this))->fops->getxattr,
+ loc, name, xdata);
+ return 0;
+ }
+
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ if (name) {
+ if (!strncmp (name, AFR_XATTR_PREFIX,
+ strlen (AFR_XATTR_PREFIX))) {
+
+ op_errno = ENODATA;
+ goto out;
+ }
+
+ if (!strcmp (name, RB_PUMP_CMD_STATUS)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Hit pump command - status");
+ pump_execute_status (frame, this);
+ ret = 0;
+ goto out;
+ }
+ }
+
+ local->fresh_children = GF_CALLOC (priv->child_count,
+ sizeof (*local->fresh_children),
+ gf_afr_mt_int32_t);
+ if (!local->fresh_children) {
+ ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children);
+ ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.getxattr.last_index);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+ loc_copy (&local->loc, loc);
+ if (name)
+ local->cont.getxattr.name = gf_strdup (name);
+
+ STACK_WIND_COOKIE (frame, pump_getxattr_cbk,
+ (void *) (long) call_child,
+ children[call_child], children[call_child]->fops->getxattr,
+ loc, name, xdata);
+
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+static int
+afr_setxattr_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (setxattr, main_frame,
+ local->op_ret, local->op_errno, NULL);
+ }
+ return 0;
+}
+
+static int
+afr_setxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+
+ int call_count = -1;
+ int need_unwind = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret != -1) {
+ if (local->success_count == 0) {
+ local->op_ret = op_ret;
+ }
+ local->success_count++;
+
+ if (local->success_count == priv->child_count) {
+ need_unwind = 1;
+ }
+ }
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ if (need_unwind)
+ local->transaction.unwind (frame, this);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+static int
+afr_setxattr_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_up_children_count (local->child_up, priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_setxattr_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->setxattr,
+ &local->loc,
+ local->cont.setxattr.dict,
+ local->cont.setxattr.flags, NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+static int
+afr_setxattr_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = frame->local;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+int32_t
+pump_setxattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ AFR_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+pump_command_reply (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0)
+ gf_log (this->name, GF_LOG_INFO,
+ "Command failed");
+ else
+ gf_log (this->name, GF_LOG_INFO,
+ "Command succeeded");
+
+ AFR_STACK_UNWIND (setxattr,
+ frame,
+ local->op_ret,
+ local->op_errno, NULL);
+
+ return 0;
+}
+
+int
+pump_parse_command (call_frame_t *frame, xlator_t *this,
+ afr_local_t *local, dict_t *dict)
+{
+
+ int ret = -1;
+
+ if (pump_command_start (this, dict)) {
+ frame->local = local;
+ local->dict = dict_ref (dict);
+ ret = pump_execute_start (frame, this);
+
+ } else if (pump_command_pause (this, dict)) {
+ frame->local = local;
+ local->dict = dict_ref (dict);
+ ret = pump_execute_pause (frame, this);
+
+ } else if (pump_command_abort (this, dict)) {
+ frame->local = local;
+ local->dict = dict_ref (dict);
+ ret = pump_execute_abort (frame, this);
+
+ } else if (pump_command_commit (this, dict)) {
+ frame->local = local;
+ local->dict = dict_ref (dict);
+ ret = pump_execute_commit (frame, this);
+ }
+ return ret;
+}
+
+int
+pump_setxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.pump*", dict,
+ op_errno, out);
+
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame, default_setxattr_cbk,
+ FIRST_CHILD (this),
+ (FIRST_CHILD (this))->fops->setxattr,
+ loc, dict, flags, xdata);
+ return 0;
+ }
+
+
+ AFR_LOCAL_ALLOC_OR_GOTO (local, out);
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0) {
+ afr_local_cleanup (local, this);
+ goto out;
+ }
+
+ ret = pump_parse_command (frame, this,
+ local, dict);
+ if (ret >= 0) {
+ ret = 0;
+ goto out;
+ }
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory.");
+ op_errno = ENOMEM;
+ ret = -1;
+ afr_local_cleanup (local, this);
+ goto out;
+ }
+
+ transaction_frame->local = local;
+
+ local->op_ret = -1;
+
+ local->cont.setxattr.dict = dict_ref (dict);
+ local->cont.setxattr.flags = flags;
+
+ local->transaction.fop = afr_setxattr_wind;
+ local->transaction.done = afr_setxattr_done;
+ local->transaction.unwind = afr_setxattr_unwind;
+
+ loc_copy (&local->loc, loc);
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
+
+ afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+
+ ret = 0;
+out:
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
+ }
+
+ return 0;
+}
+
+/* Defaults */
+static int32_t
+pump_lookup (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *xattr_req)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_lookup_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup,
+ loc,
+ xattr_req);
+ return 0;
+ }
+
+ afr_lookup (frame, this, loc, xattr_req);
+ return 0;
+}
+
+
+static int32_t
+pump_truncate (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ off_t offset, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_truncate_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate,
+ loc,
+ offset, xdata);
+ return 0;
+ }
+
+ afr_truncate (frame, this, loc, offset, xdata);
+ return 0;
+}
+
+
+static int32_t
+pump_ftruncate (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_ftruncate_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate,
+ fd,
+ offset, xdata);
+ return 0;
+ }
+
+ afr_ftruncate (frame, this, fd, offset, xdata);
+ return 0;
+}
+
+
+
+
+int
+pump_mknod (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame, default_mknod_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod,
+ loc, mode, rdev, umask, xdata);
+ return 0;
+ }
+ afr_mknod (frame, this, loc, mode, rdev, umask, xdata);
+ return 0;
+
+}
+
+
+
+int
+pump_mkdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame, default_mkdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir,
+ loc, mode, umask, xdata);
+ return 0;
+ }
+ afr_mkdir (frame, this, loc, mode, umask, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_unlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, int xflag, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_unlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink,
+ loc, xflag, xdata);
+ return 0;
+ }
+ afr_unlink (frame, this, loc, xflag, xdata);
+ return 0;
+
+}
+
+
+static int
+pump_rmdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int flags, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame, default_rmdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir,
+ loc, flags, xdata);
+ return 0;
+ }
+
+ afr_rmdir (frame, this, loc, flags, xdata);
+ return 0;
+
+}
+
+
+
+int
+pump_symlink (call_frame_t *frame, xlator_t *this,
+ const char *linkpath, loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame, default_symlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink,
+ linkpath, loc, umask, xdata);
+ return 0;
+ }
+ afr_symlink (frame, this, linkpath, loc, umask, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_rename (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_rename_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename,
+ oldloc, newloc, xdata);
+ return 0;
+ }
+ afr_rename (frame, this, oldloc, newloc, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_link (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_link_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link,
+ oldloc, newloc, xdata);
+ return 0;
+ }
+ afr_link (frame, this, oldloc, newloc, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_create (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame, default_create_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create,
+ loc, flags, mode, umask, fd, xdata);
+ return 0;
+ }
+ afr_create (frame, this, loc, flags, mode, umask, fd, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_open (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_open_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open,
+ loc, flags, fd, xdata);
+ return 0;
+ }
+ afr_open (frame, this, loc, flags, fd, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_writev (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vector,
+ int32_t count,
+ off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_writev_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev,
+ fd,
+ vector,
+ count,
+ off, flags,
+ iobref, xdata);
+ return 0;
+ }
+
+ afr_writev (frame, this, fd, vector, count, off, flags, iobref, xdata);
+ return 0;
+}
+
+
+static int32_t
+pump_flush (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_flush_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ fd, xdata);
+ return 0;
+ }
+ afr_flush (frame, this, fd, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_fsync (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t flags, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_fsync_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync,
+ fd,
+ flags, xdata);
+ return 0;
+ }
+ afr_fsync (frame, this, fd, flags, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_opendir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, fd_t *fd, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_opendir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir,
+ loc, fd, xdata);
+ return 0;
+ }
+ afr_opendir (frame, this, loc, fd, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_fsyncdir (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t flags, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_fsyncdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsyncdir,
+ fd,
+ flags, xdata);
+ return 0;
+ }
+ afr_fsyncdir (frame, this, fd, flags, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_xattrop (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ gf_xattrop_flags_t flags,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_xattrop_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->xattrop,
+ loc,
+ flags,
+ dict, xdata);
+ return 0;
+ }
+ afr_xattrop (frame, this, loc, flags, dict, xdata);
+ return 0;
+
+}
+
+static int32_t
+pump_fxattrop (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ gf_xattrop_flags_t flags,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_fxattrop_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fxattrop,
+ fd,
+ flags,
+ dict, xdata);
+ return 0;
+ }
+ afr_fxattrop (frame, this, fd, flags, dict, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_removexattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO (this, out);
+
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.pump*",
+ name, op_errno, out);
+
+ op_errno = 0;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_removexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr,
+ loc,
+ name, xdata);
+ return 0;
+ }
+ afr_removexattr (frame, this, loc, name, xdata);
+
+ out:
+ if (op_errno)
+ AFR_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
+ return 0;
+
+}
+
+
+
+static int32_t
+pump_readdir (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t off, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_readdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir,
+ fd, size, off, xdata);
+ return 0;
+ }
+ afr_readdir (frame, this, fd, size, off, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t off, dict_t *dict)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_readdirp_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp,
+ fd, size, off, dict);
+ return 0;
+ }
+ afr_readdirp (frame, this, fd, size, off, dict);
+ return 0;
+
+}
+
+
+
+static int32_t
+pump_releasedir (xlator_t *this,
+ fd_t *fd)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (priv->use_afr_in_pump)
+ afr_releasedir (this, fd);
+ return 0;
+
+}
+
+static int32_t
+pump_release (xlator_t *this,
+ fd_t *fd)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (priv->use_afr_in_pump)
+ afr_release (this, fd);
+ return 0;
+
+}
+
+static int32_t
+pump_forget (xlator_t *this, inode_t *inode)
+{
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ if (priv->use_afr_in_pump)
+ afr_forget (this, inode);
+
+ return 0;
+}
+
+static int32_t
+pump_setattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_setattr_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setattr,
+ loc, stbuf, valid, xdata);
+ return 0;
+ }
+ afr_setattr (frame, this, loc, stbuf, valid, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_fsetattr (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_fsetattr_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fsetattr,
+ fd, stbuf, valid, xdata);
+ return 0;
+ }
+ afr_fsetattr (frame, this, fd, stbuf, valid, xdata);
+ return 0;
+
+}
+
+
+/* End of defaults */
+
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_afr_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+static int
+is_xlator_pump_sink (xlator_t *child)
+{
+ return (child == PUMP_SINK_CHILD(THIS));
+}
+
+static int
+is_xlator_pump_source (xlator_t *child)
+{
+ return (child == PUMP_SOURCE_CHILD(THIS));
+}
+
+int32_t
+notify (xlator_t *this, int32_t event,
+ void *data, ...)
+{
+ int ret = -1;
+ xlator_t *child_xl = NULL;
+
+ child_xl = (xlator_t *) data;
+
+ ret = afr_notify (this, event, data, NULL);
+
+ switch (event) {
+ case GF_EVENT_CHILD_DOWN:
+ if (is_xlator_pump_source (child_xl))
+ pump_change_state (this, PUMP_STATE_ABORT);
+ break;
+
+ case GF_EVENT_CHILD_UP:
+ if (is_xlator_pump_sink (child_xl))
+ if (is_pump_start_pending (this)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "about to start synctask");
+ ret = pump_start_synctask (this);
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Could not start pump "
+ "synctask");
+ else
+ pump_remove_start_pending (this);
+ }
+ }
+
+ return ret;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ afr_private_t * priv = NULL;
+ pump_private_t *pump_priv = NULL;
+ int child_count = 0;
+ xlator_list_t * trav = NULL;
+ int i = 0;
+ int ret = -1;
+ GF_UNUSED int op_errno = 0;
+
+ int source_child = 0;
+
+ if (!this->children) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "pump translator needs a source and sink"
+ "subvolumes defined.");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Volume is dangling.");
+ }
+
+ this->private = GF_CALLOC (1, sizeof (afr_private_t),
+ gf_afr_mt_afr_private_t);
+ if (!this->private)
+ goto out;
+
+ priv = this->private;
+ LOCK_INIT (&priv->lock);
+ LOCK_INIT (&priv->read_child_lock);
+ //lock recovery is not done in afr
+ pthread_mutex_init (&priv->mutex, NULL);
+ INIT_LIST_HEAD (&priv->saved_fds);
+
+ child_count = xlator_subvolume_count (this);
+ if (child_count != 2) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "There should be exactly 2 children - one source "
+ "and one sink");
+ return -1;
+ }
+ priv->child_count = child_count;
+
+ priv->read_child = source_child;
+ priv->favorite_child = source_child;
+ priv->background_self_heal_count = 0;
+
+ priv->data_self_heal = "on";
+ priv->metadata_self_heal = 1;
+ priv->entry_self_heal = 1;
+
+ priv->data_self_heal_window_size = 16;
+
+ priv->data_change_log = 1;
+ priv->metadata_change_log = 1;
+ priv->entry_change_log = 1;
+ priv->use_afr_in_pump = 1;
+ priv->sh_readdir_size = 65536;
+
+ /* Locking options */
+
+ /* Lock server count infact does not matter. Locks are held
+ on all subvolumes, in this case being the source
+ and the sink.
+ */
+
+ priv->strict_readdir = _gf_false;
+
+ priv->wait_count = 1;
+ priv->child_up = GF_CALLOC (sizeof (unsigned char), child_count,
+ gf_afr_mt_char);
+ if (!priv->child_up) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory.");
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ priv->children = GF_CALLOC (sizeof (xlator_t *), child_count,
+ gf_afr_mt_xlator_t);
+ if (!priv->children) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory.");
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ priv->pending_key = GF_CALLOC (sizeof (*priv->pending_key),
+ child_count,
+ gf_afr_mt_char);
+ if (!priv->pending_key) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory.");
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ trav = this->children;
+ i = 0;
+ while (i < child_count) {
+ priv->children[i] = trav->xlator;
+
+ ret = gf_asprintf (&priv->pending_key[i], "%s.%s", AFR_XATTR_PREFIX,
+ trav->xlator->name);
+ if (-1 == ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "asprintf failed to set pending key");
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ trav = trav->next;
+ i++;
+ }
+
+ ret = gf_asprintf (&priv->sh_domain, "%s-self-heal", this->name);
+ if (-1 == ret) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ priv->first_lookup = 1;
+ priv->root_inode = NULL;
+
+ priv->last_event = GF_CALLOC (child_count, sizeof (*priv->last_event),
+ gf_afr_mt_int32_t);
+ if (!priv->last_event) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ pump_priv = GF_CALLOC (1, sizeof (*pump_priv),
+ gf_afr_mt_pump_priv);
+ if (!pump_priv) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory");
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ LOCK_INIT (&pump_priv->resume_path_lock);
+ LOCK_INIT (&pump_priv->pump_state_lock);
+
+ pump_priv->resume_path = GF_CALLOC (1, PATH_MAX,
+ gf_afr_mt_char);
+ if (!pump_priv->resume_path) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory");
+ ret = -1;
+ goto out;
+ }
+
+ pump_priv->env = this->ctx->env;
+ if (!pump_priv->env) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not create new sync-environment");
+ ret = -1;
+ goto out;
+ }
+
+ /* keep more local here as we may need them for self-heal etc */
+ this->local_pool = mem_pool_new (afr_local_t, 128);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto out;
+ }
+
+ priv->pump_private = pump_priv;
+
+ pump_change_state (this, PUMP_STATE_ABORT);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+fini (xlator_t *this)
+{
+ afr_private_t * priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ priv = this->private;
+ this->private = NULL;
+ if (!priv)
+ goto out;
+
+ pump_priv = priv->pump_private;
+ if (!pump_priv)
+ goto afr_priv;
+
+ GF_FREE (pump_priv->resume_path);
+ LOCK_DESTROY (&pump_priv->resume_path_lock);
+ LOCK_DESTROY (&pump_priv->pump_state_lock);
+ GF_FREE (pump_priv);
+afr_priv:
+ afr_priv_destroy (priv);
+out:
+ return 0;
+}
+
+
+struct xlator_fops fops = {
+ .lookup = pump_lookup,
+ .open = pump_open,
+ .flush = pump_flush,
+ .fsync = pump_fsync,
+ .fsyncdir = pump_fsyncdir,
+ .xattrop = pump_xattrop,
+ .fxattrop = pump_fxattrop,
+ .getxattr = pump_getxattr,
+
+ /* inode write */
+ .writev = pump_writev,
+ .truncate = pump_truncate,
+ .ftruncate = pump_ftruncate,
+ .setxattr = pump_setxattr,
+ .setattr = pump_setattr,
+ .fsetattr = pump_fsetattr,
+ .removexattr = pump_removexattr,
+
+ /* dir read */
+ .opendir = pump_opendir,
+ .readdir = pump_readdir,
+ .readdirp = pump_readdirp,
+
+ /* dir write */
+ .create = pump_create,
+ .mknod = pump_mknod,
+ .mkdir = pump_mkdir,
+ .unlink = pump_unlink,
+ .rmdir = pump_rmdir,
+ .link = pump_link,
+ .symlink = pump_symlink,
+ .rename = pump_rename,
+};
+
+struct xlator_dumpops dumpops = {
+ .priv = afr_priv_dump,
+};
+
+
+struct xlator_cbks cbks = {
+ .release = pump_release,
+ .releasedir = pump_releasedir,
+ .forget = pump_forget,
+};
+
+struct volume_options options[] = {
+ { .key = {NULL} },
+};
diff --git a/xlators/cluster/afr/src/pump.h b/xlators/cluster/afr/src/pump.h
new file mode 100644
index 000000000..bc4c31a78
--- /dev/null
+++ b/xlators/cluster/afr/src/pump.h
@@ -0,0 +1,78 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __PUMP_H__
+#define __PUMP_H__
+
+#include "syncop.h"
+
+/* FIXME: Needs to be defined in a common file */
+#define CLIENT_CMD_CONNECT "trusted.glusterfs.client-connect"
+#define CLIENT_CMD_DISCONNECT "trusted.glusterfs.client-disconnect"
+
+#define PUMP_SOURCE_COMPLETE "trusted.glusterfs.pump-source-complete"
+#define PUMP_SINK_COMPLETE "trusted.glusterfs.pump-sink-complete"
+
+#define PUMP_PATH "trusted.glusterfs.pump-path"
+
+#define PUMP_SOURCE_CHILD(xl) (xl->children->xlator)
+#define PUMP_SINK_CHILD(xl) (xl->children->next->xlator)
+
+typedef enum {
+ PUMP_STATE_RUNNING, /* Pump is running and migrating files */
+ PUMP_STATE_RESUME, /* Pump is resuming from a previous pause */
+ PUMP_STATE_PAUSE, /* Pump is paused */
+ PUMP_STATE_ABORT, /* Pump is aborted */
+ PUMP_STATE_COMMIT, /* Pump is commited */
+} pump_state_t;
+
+typedef struct _pump_private {
+ struct syncenv *env; /* The env pointer to the pump synctask */
+ char *resume_path; /* path to resume from the last pause */
+ gf_lock_t resume_path_lock; /* Synchronize resume_path changes */
+ gf_lock_t pump_state_lock; /* Synchronize pump_state changes */
+ pump_state_t pump_state; /* State of pump */
+ char current_file[PATH_MAX]; /* Current file being pumped */
+ uint64_t number_files_pumped; /* Number of files pumped */
+ gf_boolean_t pump_finished; /* Boolean to indicate pump termination */
+ char pump_start_pending; /* Boolean to mark start pending until
+ CHILD_UP */
+ call_stub_t *cleaner;
+} pump_private_t;
+
+void
+build_root_loc (inode_t *inode, loc_t *loc);
+int pump_start (call_frame_t *frame, xlator_t *this);
+
+gf_boolean_t
+pump_command_start (xlator_t *this, dict_t *dict);
+
+int
+pump_execute_start (call_frame_t *frame, xlator_t *this);
+
+gf_boolean_t
+pump_command_pause (xlator_t *this, dict_t *dict);
+
+int
+pump_execute_pause (call_frame_t *frame, xlator_t *this);
+
+gf_boolean_t
+pump_command_abort (xlator_t *this, dict_t *dict);
+
+int
+pump_execute_abort (call_frame_t *frame, xlator_t *this);
+
+gf_boolean_t
+pump_command_status (xlator_t *this, dict_t *dict);
+
+int
+pump_execute_status (call_frame_t *frame, xlator_t *this);
+
+#endif /* __PUMP_H__ */
diff --git a/xlators/cluster/dht/src/Makefile.am b/xlators/cluster/dht/src/Makefile.am
index f87212699..174bea841 100644
--- a/xlators/cluster/dht/src/Makefile.am
+++ b/xlators/cluster/dht/src/Makefile.am
@@ -1,30 +1,38 @@
-xlator_LTLIBRARIES = dht.la nufa.la
+xlator_LTLIBRARIES = dht.la nufa.la switch.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
+dht_common_source = dht-layout.c dht-helper.c dht-linkfile.c dht-rebalance.c \
+ dht-selfheal.c dht-rename.c dht-hashfn.c dht-diskusage.c \
+ dht-common.c dht-inode-write.c dht-inode-read.c dht-shared.c \
+ $(top_builddir)/xlators/lib/src/libxlator.c
-dht_common_source = dht-layout.c dht-helper.c dht-linkfile.c \
- dht-selfheal.c dht-rename.c dht-hashfn.c dht-diskusage.c
-
-dht_la_SOURCES = $(dht_common_source) dht.c
+dht_la_SOURCES = $(dht_common_source) dht.c
nufa_la_SOURCES = $(dht_common_source) nufa.c
+switch_la_SOURCES = $(dht_common_source) switch.c
-dht_la_LDFLAGS = -module -avoidversion
+dht_la_LDFLAGS = -module -avoid-version
dht_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-nufa_la_LDFLAGS = -module -avoidversion
+nufa_la_LDFLAGS = -module -avoid-version
nufa_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = dht-common.h dht-common.c
+switch_la_LDFLAGS = -module -avoid-version
+switch_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = dht-common.h dht-mem-types.h \
+ $(top_builddir)/xlators/lib/src/libxlator.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/xlators/lib/src
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CFLAGS = -Wall $(GF_CFLAGS)
-CLEANFILES =
+CLEANFILES =
uninstall-local:
rm -f $(DESTDIR)$(xlatordir)/distribute.so
install-data-hook:
- ln -sf dht.so $(DESTDIR)$(xlatordir)/distribute.so \ No newline at end of file
+ ln -sf dht.so $(DESTDIR)$(xlatordir)/distribute.so
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 62f3822a5..8f61339e6 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2009-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -27,10 +18,87 @@
#include "glusterfs.h"
#include "xlator.h"
+#include "libxlator.h"
#include "dht-common.h"
#include "defaults.h"
+#include "byte-order.h"
+#include "glusterfs-acl.h"
#include <sys/time.h>
+#include <libgen.h>
+
+int
+dht_aggregate (dict_t *this, char *key, data_t *value, void *data)
+{
+ dict_t *dst = NULL;
+ int64_t *ptr = 0, *size = NULL;
+ int32_t ret = -1;
+ data_t *dict_data = NULL;
+
+ dst = data;
+
+ if (strcmp (key, GF_XATTR_QUOTA_SIZE_KEY) == 0) {
+ ret = dict_get_bin (dst, key, (void **)&size);
+ if (ret < 0) {
+ size = GF_CALLOC (1, sizeof (int64_t),
+ gf_common_mt_char);
+ if (size == NULL) {
+ gf_log ("dht", GF_LOG_WARNING,
+ "memory allocation failed");
+ return -1;
+ }
+ ret = dict_set_bin (dst, key, size, sizeof (int64_t));
+ if (ret < 0) {
+ gf_log ("dht", GF_LOG_WARNING,
+ "dht aggregate dict set failed");
+ GF_FREE (size);
+ return -1;
+ }
+ }
+
+ ptr = data_to_bin (value);
+ if (ptr == NULL) {
+ gf_log ("dht", GF_LOG_WARNING, "data to bin failed");
+ return -1;
+ }
+
+ *size = hton64 (ntoh64 (*size) + ntoh64 (*ptr));
+
+ } else if (fnmatch (GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0) {
+ ret = gf_get_min_stime (THIS, dst, key, value);
+ if (ret < 0)
+ return ret;
+ } else {
+ /* compare user xattrs only */
+ if (!strncmp (key, "user.", strlen ("user."))) {
+ ret = dict_lookup (dst, key, &dict_data);
+ if (!ret && dict_data && value) {
+ ret = is_data_equal (dict_data, value);
+ if (!ret)
+ gf_log ("dht", GF_LOG_DEBUG,
+ "xattr mismatch for %s", key);
+ }
+ }
+ ret = dict_set (dst, key, value);
+ if (ret)
+ gf_log ("dht", GF_LOG_WARNING, "xattr dict set failed");
+ }
+
+ return 0;
+}
+
+
+void
+dht_aggregate_xattr (dict_t *dst, dict_t *src)
+{
+ if ((dst == NULL) || (src == NULL)) {
+ goto out;
+ }
+
+ dict_foreach (src, dht_aggregate, dst);
+out:
+ return;
+}
/* TODO:
- use volumename in xattr instead of "dht"
@@ -42,100 +110,369 @@
int
dht_lookup_selfheal_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int op_ret, int op_errno)
+ xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- int ret = 0;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int ret = -1;
- local = frame->local;
- ret = op_ret;
-
- if (ret == 0) {
- layout = local->selfheal.layout;
- ret = inode_ctx_put (local->inode, this,
- (uint64_t)(long)layout);
-
- if (ret == 0)
- local->selfheal.layout = NULL;
-
- if (local->st_ino) {
- local->stbuf.st_ino = local->st_ino;
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not find hashed subvolume for %s",
- local->loc.path);
- }
- }
+ GF_VALIDATE_OR_GOTO ("dht", frame, out);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
- DHT_STACK_UNWIND (frame, ret, local->op_errno, local->inode,
- &local->stbuf, local->xattr);
+ local = frame->local;
+ ret = op_ret;
- return 0;
+ FRAME_SU_UNDO (frame, dht_local_t);
+
+ if (ret == 0) {
+ layout = local->selfheal.layout;
+ ret = dht_layout_set (this, local->inode, layout);
+ }
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
+
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
+
+ DHT_STACK_UNWIND (lookup, frame, ret, local->op_errno, local->inode,
+ &local->stbuf, local->xattr, &local->postparent);
+
+out:
+ return ret;
+}
+
+
+int
+dht_discover_complete (xlator_t *this, call_frame_t *discover_frame)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
+ int op_errno = 0;
+ int ret = -1;
+ dht_layout_t *layout = NULL;
+ dht_conf_t *conf = NULL;
+
+ local = discover_frame->local;
+ layout = local->layout;
+ conf = this->private;
+
+ LOCK(&discover_frame->lock);
+ {
+ main_frame = local->main_frame;
+ local->main_frame = NULL;
+ }
+ UNLOCK(&discover_frame->lock);
+
+ if (!main_frame)
+ return 0;
+
+ if (local->file_count && local->dir_count) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "path %s exists as a file on one subvolume "
+ "and directory on another. "
+ "Please fix it manually",
+ local->loc.path);
+ op_errno = EIO;
+ goto out;
+ }
+
+ if (local->cached_subvol) {
+ ret = dht_layout_preset (this, local->cached_subvol,
+ local->inode);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set layout for subvolume %s",
+ local->cached_subvol ? local->cached_subvol->name : "<nil>");
+ op_errno = EINVAL;
+ goto out;
+ }
+ } else {
+ ret = dht_layout_normalize (this, &local->loc, layout);
+ if ((ret < 0) || ((ret > 0) && (local->op_ret != 0))) {
+ /* either the layout is incorrect or the directory is
+ * not found even in one subvolume.
+ */
+ gf_log (this->name, GF_LOG_DEBUG,
+ "normalizing failed on %s "
+ "(overlaps/holes present: %s, "
+ "ENOENT errors: %d)", local->loc.path,
+ (ret < 0) ? "yes" : "no", (ret > 0) ? ret : 0);
+ if ((ret > 0) && (ret == conf->subvolume_cnt)) {
+ op_errno = ESTALE;
+ goto out;
+ }
+ }
+
+ if (local->inode)
+ dht_layout_set (this, local->inode, layout);
+ }
+
+ DHT_STACK_UNWIND (lookup, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr,
+ &local->postparent);
+ return 0;
+out:
+ DHT_STACK_UNWIND (lookup, main_frame, -1, op_errno, NULL, NULL, NULL,
+ NULL);
+
+ return ret;
+}
+
+
+int
+dht_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+ int ret = -1;
+ int is_dir = 0;
+ int is_linkfile = 0;
+ int attempt_unwind = 0;
+ dht_conf_t *conf = 0;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, out);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", this->private, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ layout = local->layout;
+
+ /* Check if the gfid is different for file from other node */
+ if (!op_ret && uuid_compare (local->gfid, stbuf->ia_gfid)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: gfid different on %s",
+ local->loc.path, prev->this->name);
+ }
+
+
+ LOCK (&frame->lock);
+ {
+ /* TODO: assert equal mode on stbuf->st_mode and
+ local->stbuf->st_mode
+
+ else mkdir/chmod/chown and fix
+ */
+ ret = dht_layout_merge (this, layout, prev->this,
+ op_ret, op_errno, xattr);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to merge layouts", local->loc.path);
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "lookup of %s on %s returned error (%s)",
+ local->loc.path, prev->this->name,
+ strerror (op_errno));
+
+ goto unlock;
+ }
+
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr,
+ conf->link_xattr_name);
+ is_dir = check_is_dir (inode, stbuf, xattr);
+
+ if (is_dir) {
+ local->dir_count ++;
+ } else {
+ local->file_count ++;
+
+ if (!is_linkfile) {
+ /* real file */
+ local->cached_subvol = prev->this;
+ attempt_unwind = 1;
+ } else {
+ goto unlock;
+ }
+ }
+
+ local->op_ret = 0;
+
+ if (local->xattr == NULL) {
+ local->xattr = dict_ref (xattr);
+ } else {
+ dht_aggregate_xattr (local->xattr, xattr);
+ }
+
+ if (local->inode == NULL)
+ local->inode = inode_ref (inode);
+
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+ dht_iatt_merge (this, &local->postparent, postparent,
+ prev->this);
+ }
+unlock:
+ UNLOCK (&frame->lock);
+out:
+ this_call_cnt = dht_frame_return (frame);
+
+ if (is_last_call (this_call_cnt) || attempt_unwind) {
+ dht_discover_complete (this, frame);
+ }
+
+ if (is_last_call (this_call_cnt))
+ DHT_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+
+int
+dht_discover (call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ int ret;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int call_cnt = 0;
+ int op_errno = EINVAL;
+ int i = 0;
+ call_frame_t *discover_frame = NULL;
+
+ conf = this->private;
+ local = frame->local;
+
+ ret = dict_set_uint32 (local->xattr_req, conf->xattr_name, 4 * 4);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set '%s' key",
+ loc->path, conf->xattr_name);
+
+ ret = dict_set_uint32 (local->xattr_req, conf->link_xattr_name, 256);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set '%s' key",
+ loc->path, conf->link_xattr_name);
+
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+
+ local->layout = dht_layout_new (this, conf->subvolume_cnt);
+
+ if (!local->layout) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ uuid_copy (local->gfid, loc->gfid);
+
+ discover_frame = copy_frame (frame);
+ if (!discover_frame) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ discover_frame->local = local;
+ frame->local = NULL;
+ local->main_frame = frame;
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (discover_frame, dht_discover_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup,
+ &local->loc, local->xattr_req);
+ }
+
+ return 0;
+
+err:
+ DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL);
+
+ return 0;
}
int
dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
- inode_t *inode, struct stat *stbuf, dict_t *xattr)
+ inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
{
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
- int ret = 0;
- int is_dir = 0;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+ int ret = -1;
+ int is_dir = 0;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, out);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", this->private, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
- conf = this->private;
local = frame->local;
prev = cookie;
- layout = local->layout;
+ layout = local->layout;
+
+ if (!op_ret && uuid_is_null (local->gfid))
+ memcpy (local->gfid, stbuf->ia_gfid, 16);
+
+ /* Check if the gfid is different for file from other node */
+ if (!op_ret && uuid_compare (local->gfid, stbuf->ia_gfid)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: gfid different on %s",
+ local->loc.path, prev->this->name);
+ }
LOCK (&frame->lock);
{
/* TODO: assert equal mode on stbuf->st_mode and
- local->stbuf->st_mode
+ local->stbuf->st_mode
- else mkdir/chmod/chown and fix
- */
- ret = dht_layout_merge (this, layout, prev->this,
- op_ret, op_errno, xattr);
+ else mkdir/chmod/chown and fix
+ */
+ ret = dht_layout_merge (this, layout, prev->this,
+ op_ret, op_errno, xattr);
- if (op_ret == -1) {
- local->op_errno = ENOENT;
- gf_log (this->name, GF_LOG_DEBUG,
- "lookup of %s on %s returned error (%s)",
- local->loc.path, prev->this->name,
- strerror (op_errno));
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "lookup of %s on %s returned error (%s)",
+ local->loc.path, prev->this->name,
+ strerror (op_errno));
- goto unlock;
- }
+ goto unlock;
+ }
- is_dir = check_is_dir (inode, stbuf, xattr);
- if (!is_dir) {
+ is_dir = check_is_dir (inode, stbuf, xattr);
+ if (!is_dir) {
gf_log (this->name, GF_LOG_DEBUG,
"lookup of %s on %s returned non dir 0%o",
local->loc.path, prev->this->name,
- stbuf->st_mode);
+ stbuf->ia_type);
local->need_selfheal = 1;
- goto unlock;
+ goto unlock;
}
- local->op_ret = 0;
- if (local->xattr == NULL)
- local->xattr = dict_ref (xattr);
- if (local->inode == NULL)
- local->inode = inode_ref (inode);
-
- dht_stat_merge (this, &local->stbuf, stbuf, prev->this);
+ local->op_ret = 0;
+ if (local->xattr == NULL) {
+ local->xattr = dict_ref (xattr);
+ } else {
+ dht_aggregate_xattr (local->xattr, xattr);
+ }
- if (prev->this == local->hashed_subvol)
- local->st_ino = local->stbuf.st_ino;
+ if (local->inode == NULL)
+ local->inode = inode_ref (inode);
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+ dht_iatt_merge (this, &local->postparent, postparent,
+ prev->this);
}
unlock:
UNLOCK (&frame->lock);
@@ -150,176 +487,270 @@ unlock:
return 0;
}
- if (local->op_ret == 0) {
- ret = dht_layout_normalize (this, &local->loc, layout);
+ if (local->op_ret == 0) {
+ ret = dht_layout_normalize (this, &local->loc, layout);
- local->layout = NULL;
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "fixing assignment on %s",
+ local->loc.path);
+ goto selfheal;
+ }
- if (ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "fixing assignment on %s",
- local->loc.path);
- goto selfheal;
- }
-
- inode_ctx_put (local->inode, this,
- (uint64_t)(long)layout);
-
- if (local->st_ino) {
- local->stbuf.st_ino = local->st_ino;
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not find hashed subvol for %s",
- local->loc.path);
- }
- }
+ dht_layout_set (this, local->inode, layout);
+ }
- DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, local->xattr);
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
+
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
+ DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr,
+ &local->postparent);
}
- return 0;
+ return 0;
selfheal:
- ret = dht_selfheal_directory (frame, dht_lookup_selfheal_cbk,
- &local->loc, layout);
-
- return 0;
+ FRAME_SU_DO (frame, dht_local_t);
+ uuid_copy (local->loc.gfid, local->gfid);
+ ret = dht_selfheal_directory (frame, dht_lookup_selfheal_cbk,
+ &local->loc, layout);
+out:
+ return ret;
}
int
dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
- inode_t *inode, struct stat *stbuf, dict_t *xattr)
+ inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
{
dht_local_t *local = NULL;
int this_call_cnt = 0;
call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
- dht_conf_t *conf = NULL;
- int ret = -1;
- int is_dir = 0;
- int is_linkfile = 0;
+ dht_layout_t *layout = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int is_dir = 0;
+ int is_linkfile = 0;
+ call_frame_t *copy = NULL;
+ dht_local_t *copy_local = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, err);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, err);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, err);
local = frame->local;
prev = cookie;
- conf = this->private;
+ conf = this->private;
+ if (!conf)
+ goto out;
LOCK (&frame->lock);
{
- if (op_ret == -1) {
- local->op_errno = op_errno;
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
- if ((op_errno != ENOTCONN)
+ if ((op_errno != ENOTCONN)
&& (op_errno != ENOENT)
&& (op_errno != ESTALE)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
+ gf_log (this->name, GF_LOG_INFO,
+ "subvolume %s for %s returned -1 (%s)",
+ prev->this->name, local->loc.path,
+ strerror (op_errno));
}
-
if (op_errno == ESTALE) {
- /* propogate the ESTALE to parent.
- * setting local->layout_mismatch would send
+ /* propagate the ESTALE to parent.
+ * setting local->return_estale would send
* ESTALE to parent. */
- local->layout_mismatch = 1;
+ local->return_estale = 1;
}
- goto unlock;
- }
+ /* if it is ENOENT, we may have to do a
+ * 'lookup_everywhere()' to make sure
+ * the file is not migrated */
+ if (op_errno == ENOENT) {
+ if (IA_ISREG (local->loc.inode->ia_type)) {
+ local->need_lookup_everywhere = 1;
+ }
+ }
+ goto unlock;
+ }
- if (S_IFMT & (stbuf->st_mode ^ local->inode->st_mode)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "mismatching filetypes 0%o v/s 0%o for %s",
- (stbuf->st_mode & S_IFMT),
- (local->inode->st_mode & S_IFMT),
- local->loc.path);
+ if (stbuf->ia_type != local->inode->ia_type) {
+ gf_log (this->name, GF_LOG_INFO,
+ "mismatching filetypes 0%o v/s 0%o for %s",
+ (stbuf->ia_type), (local->inode->ia_type),
+ local->loc.path);
- local->op_ret = -1;
- local->op_errno = EINVAL;
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
- goto unlock;
- }
+ goto unlock;
+ }
- layout = dht_layout_get (this, inode);
-
- is_dir = check_is_dir (inode, stbuf, xattr);
- is_linkfile = check_is_linkfile (inode, stbuf, xattr);
-
- if (is_linkfile) {
- gf_log (this->name, GF_LOG_DEBUG,
- "linkfile found in revalidate for %s",
- local->loc.path);
- local->layout_mismatch = 1;
-
- goto unlock;
- }
+ layout = local->layout;
- if (is_dir) {
- ret = dht_layout_dir_mismatch (this, layout,
- prev->this, &local->loc,
- xattr);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "mismatching layouts for %s",
- local->loc.path);
-
- local->layout_mismatch = 1;
-
- goto unlock;
- }
- }
-
- dht_stat_merge (this, &local->stbuf, stbuf, prev->this);
-
- local->op_ret = 0;
- local->stbuf.st_ino = local->st_ino;
-
- if (!local->xattr)
- local->xattr = dict_ref (xattr);
- }
-unlock:
- UNLOCK (&frame->lock);
+ is_dir = check_is_dir (inode, stbuf, xattr);
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr,
+ conf->link_xattr_name);
+
+ if (is_linkfile) {
+ gf_log (this->name, GF_LOG_INFO,
+ "linkfile found in revalidate for %s",
+ local->loc.path);
+ local->return_estale = 1;
+
+ goto unlock;
+ }
+
+ if (is_dir) {
+ ret = dht_dir_has_layout (xattr, conf->xattr_name);
+ if (ret >= 0) {
+ if (is_greater_time(local->stbuf.ia_ctime,
+ local->stbuf.ia_ctime_nsec,
+ stbuf->ia_ctime,
+ stbuf->ia_ctime_nsec)) {
+ local->prebuf.ia_gid = stbuf->ia_gid;
+ local->prebuf.ia_uid = stbuf->ia_uid;
+ }
+ }
+ if (local->stbuf.ia_type != IA_INVAL)
+ {
+ if ((local->stbuf.ia_gid != stbuf->ia_gid) ||
+ (local->stbuf.ia_uid != stbuf->ia_uid)) {
+ local->need_selfheal = 1;
+ }
+ }
+ ret = dht_layout_dir_mismatch (this, layout,
+ prev->this, &local->loc,
+ xattr);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "mismatching layouts for %s",
+ local->loc.path);
+
+ local->layout_mismatch = 1;
+
+ goto unlock;
+ }
+ }
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+ dht_iatt_merge (this, &local->postparent, postparent,
+ prev->this);
+
+ local->op_ret = 0;
+
+ if (!local->xattr) {
+ local->xattr = dict_ref (xattr);
+ } else if (is_dir) {
+ dht_aggregate_xattr (local->xattr, xattr);
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+out:
this_call_cnt = dht_frame_return (frame);
if (is_last_call (this_call_cnt)) {
- if (!S_ISDIR (local->stbuf.st_mode)
- && (local->hashed_subvol != local->cached_subvol)
- && (local->stbuf.st_nlink == 1)
- && (conf->unhashed_sticky_bit)) {
- local->stbuf.st_mode |= S_ISVTX;
- }
-
+ if (!IA_ISDIR (local->stbuf.ia_type)
+ && (local->hashed_subvol != local->cached_subvol)
+ && (local->stbuf.ia_nlink == 1)
+ && (conf && conf->unhashed_sticky_bit)) {
+ local->stbuf.ia_prot.sticky = 1;
+ }
+ if (local->need_selfheal) {
+ local->need_selfheal = 0;
+ uuid_copy (local->gfid, local->stbuf.ia_gfid);
+ local->stbuf.ia_gid = local->prebuf.ia_gid;
+ local->stbuf.ia_uid = local->prebuf.ia_uid;
+ copy = create_frame (this, this->ctx->pool);
+ if (copy) {
+ copy_local = dht_local_init (copy, &local->loc,
+ NULL, 0);
+ if (!copy_local)
+ goto cont;
+ copy_local->stbuf = local->stbuf;
+ copy->local = copy_local;
+ FRAME_SU_DO (copy, dht_local_t);
+ ret = synctask_new (this->ctx->env,
+ dht_dir_attr_heal,
+ dht_dir_attr_heal_done,
+ copy, copy);
+ }
+ }
+cont:
if (local->layout_mismatch) {
- local->op_ret = -1;
- local->op_errno = ESTALE;
- }
-
- DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, local->xattr);
- }
+ /* Found layout mismatch in the directory, need to
+ fix this in the inode context */
+ dht_layout_unref (this, local->layout);
+ local->layout = NULL;
+ dht_lookup_directory (frame, this, &local->loc);
+ return 0;
+ }
- return 0;
+ if (local->need_lookup_everywhere) {
+ /* As the current layout gave ENOENT error, we would
+ need a new layout */
+ dht_layout_unref (this, local->layout);
+ local->layout = NULL;
+
+ /* We know that current cached subvol is no more
+ valid, get the new one */
+ local->cached_subvol = NULL;
+ dht_lookup_everywhere (frame, this, &local->loc);
+ return 0;
+ }
+ if (local->return_estale) {
+ local->op_ret = -1;
+ local->op_errno = ESTALE;
+ }
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
+
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
+ DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr,
+ &local->postparent);
+ }
+
+err:
+ return ret;
}
int
dht_lookup_linkfile_create_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *stbuf)
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- xlator_t *cached_subvol = NULL;
- dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *cached_subvol = NULL;
+ dht_conf_t *conf = NULL;
int ret = -1;
- local = frame->local;
- cached_subvol = local->cached_subvol;
- conf = this->private;
+ GF_VALIDATE_OR_GOTO ("dht", frame, out);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", this->private, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
- ret = dht_layout_inode_set (this, local->cached_subvol, inode);
+ local = frame->local;
+ cached_subvol = local->cached_subvol;
+ conf = this->private;
+
+ ret = dht_layout_preset (this, local->cached_subvol, inode);
if (ret < 0) {
gf_log (this->name, GF_LOG_DEBUG,
"failed to set layout for subvolume %s",
@@ -329,65 +760,232 @@ dht_lookup_linkfile_create_cbk (call_frame_t *frame, void *cookie,
goto unwind;
}
- local->op_ret = 0;
- if ((local->stbuf.st_nlink == 1)
- && (conf->unhashed_sticky_bit)) {
- local->stbuf.st_mode |= S_ISVTX;
- }
+ local->op_ret = 0;
+ if ((local->stbuf.ia_nlink == 1)
+ && (conf && conf->unhashed_sticky_bit)) {
+ local->stbuf.ia_prot.sticky = 1;
+ }
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ postparent, 1);
+ }
unwind:
- DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, local->xattr);
- return 0;
+ if (local->linked == _gf_true)
+ dht_linkfile_attr_heal (frame, this);
+
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
+ DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr,
+ &local->postparent);
+out:
+ return ret;
+}
+
+
+int
+dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this)
+{
+ int ret = 0;
+ dht_local_t *local = NULL;
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *cached_subvol = NULL;
+ dht_layout_t *layout = NULL;
+
+ local = frame->local;
+ hashed_subvol = local->hashed_subvol;
+ cached_subvol = local->cached_subvol;
+
+ if (local->file_count && local->dir_count) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "path %s exists as a file on one subvolume "
+ "and directory on another. "
+ "Please fix it manually",
+ local->loc.path);
+ DHT_STACK_UNWIND (lookup, frame, -1, EIO, NULL, NULL, NULL,
+ NULL);
+ return 0;
+ }
+
+ if (local->dir_count) {
+ dht_lookup_directory (frame, this, &local->loc);
+ return 0;
+ }
+
+ if (!cached_subvol) {
+ DHT_STACK_UNWIND (lookup, frame, -1, ENOENT, NULL, NULL, NULL,
+ NULL);
+ return 0;
+ }
+
+ if (local->need_lookup_everywhere) {
+ if (uuid_compare (local->gfid, local->inode->gfid)) {
+ /* GFID different, return error */
+ DHT_STACK_UNWIND (lookup, frame, -1, ENOENT, NULL,
+ NULL, NULL, NULL);
+ return 0;
+ }
+ local->op_ret = 0;
+ local->op_errno = 0;
+ layout = dht_layout_for_subvol (this, cached_subvol);
+ if (!layout) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%s: no pre-set layout for subvolume %s",
+ local->loc.path, (cached_subvol ?
+ cached_subvol->name :
+ "<nil>"));
+ }
+
+ ret = dht_layout_set (this, local->inode, layout);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%s: failed to set layout for subvol %s",
+ local->loc.path, (cached_subvol ?
+ cached_subvol->name :
+ "<nil>"));
+ }
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
+
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
+ DHT_STACK_UNWIND (lookup, frame, local->op_ret,
+ local->op_errno, local->inode,
+ &local->stbuf, local->xattr,
+ &local->postparent);
+ return 0;
+ }
+
+ if (!hashed_subvol) {
+ gf_log (this->name, GF_LOG_INFO,
+ "cannot create linkfile file for %s on %s: "
+ "hashed subvolume cannot be found.",
+ local->loc.path, cached_subvol->name);
+
+ local->op_ret = 0;
+ local->op_errno = 0;
+
+ ret = dht_layout_preset (frame->this, cached_subvol,
+ local->inode);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "failed to set layout for subvol %s",
+ cached_subvol ? cached_subvol->name :
+ "<nil>");
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ }
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
+
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
+ DHT_STACK_UNWIND (lookup, frame, local->op_ret,
+ local->op_errno, local->inode,
+ &local->stbuf, local->xattr,
+ &local->postparent);
+ return 0;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "linking file %s existing on %s to %s (hash)",
+ local->loc.path, cached_subvol->name,
+ hashed_subvol->name);
+
+ ret = dht_linkfile_create (frame,
+ dht_lookup_linkfile_create_cbk, this,
+ cached_subvol, hashed_subvol, &local->loc);
+
+ return ret;
+}
+
+
+int
+dht_lookup_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ int this_call_cnt = 0;
+
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ dht_lookup_everywhere_done (frame, this);
+ }
+
+ return 0;
}
int
dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *buf, dict_t *xattr)
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
{
- dht_conf_t *conf = NULL;
dht_local_t *local = NULL;
int this_call_cnt = 0;
call_frame_t *prev = NULL;
- int is_linkfile = 0;
- int is_dir = 0;
- xlator_t *subvol = NULL;
- loc_t *loc = NULL;
- xlator_t *link_subvol = NULL;
- xlator_t *hashed_subvol = NULL;
- xlator_t *cached_subvol = NULL;
- int ret = -1;
+ int is_linkfile = 0;
+ int is_dir = 0;
+ xlator_t *subvol = NULL;
+ loc_t *loc = NULL;
+ xlator_t *link_subvol = NULL;
+ int ret = -1;
+ int32_t fd_count = 0;
+ dht_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, out);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+ GF_VALIDATE_OR_GOTO ("dht", this->private, out);
+
+ local = frame->local;
+ loc = &local->loc;
+ conf = this->private;
- conf = this->private;
+ prev = cookie;
+ subvol = prev->this;
- local = frame->local;
- loc = &local->loc;
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ if (op_errno != ENOENT)
+ local->op_errno = op_errno;
+ goto unlock;
+ }
- prev = cookie;
- subvol = prev->this;
+ if (uuid_is_null (local->gfid))
+ uuid_copy (local->gfid, buf->ia_gfid);
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- if (op_errno != ENOENT)
- local->op_errno = op_errno;
- goto unlock;
- }
+ if (uuid_compare (local->gfid, buf->ia_gfid)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: gfid differs on subvolume %s",
+ loc->path, prev->this->name);
+ }
- is_linkfile = check_is_linkfile (inode, buf, xattr);
- is_dir = check_is_dir (inode, buf, xattr);
-
- if (is_linkfile) {
- link_subvol = dht_linkfile_subvol (this, inode, buf,
- xattr);
- gf_log (this->name, GF_LOG_DEBUG,
- "found on %s linkfile %s (-> %s)",
- subvol->name, loc->path,
- link_subvol ? link_subvol->name : "''");
- goto unlock;
- }
+ is_linkfile = check_is_linkfile (inode, buf, xattr,
+ conf->link_xattr_name);
+ is_dir = check_is_dir (inode, buf, xattr);
+
+ if (is_linkfile) {
+ link_subvol = dht_linkfile_subvol (this, inode, buf,
+ xattr);
+ gf_log (this->name, GF_LOG_DEBUG,
+ "found on %s linkfile %s (-> %s)",
+ subvol->name, loc->path,
+ link_subvol ? link_subvol->name : "''");
+ goto unlock;
+ }
+
+ /* non linkfile GFID takes precedence */
+ uuid_copy (local->gfid, buf->ia_gfid);
if (is_dir) {
local->dir_count++;
@@ -400,189 +998,183 @@ dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (!local->cached_subvol) {
/* found one file */
- dht_stat_merge (this, &local->stbuf, buf,
+ dht_iatt_merge (this, &local->stbuf, buf,
subvol);
local->xattr = dict_ref (xattr);
local->cached_subvol = subvol;
gf_log (this->name, GF_LOG_DEBUG,
"found on %s file %s",
subvol->name, loc->path);
+
+ dht_iatt_merge (this, &local->postparent,
+ postparent, subvol);
} else {
- gf_log (this->name, GF_LOG_DEBUG,
+ /* This is where we need 'rename' both entries logic */
+ gf_log (this->name, GF_LOG_WARNING,
"multiple subvolumes (%s and %s) have "
- "file %s", local->cached_subvol->name,
+ "file %s (preferably rename the file "
+ "in the backend, and do a fresh lookup)",
+ local->cached_subvol->name,
subvol->name, local->loc.path);
}
}
- }
+ }
unlock:
- UNLOCK (&frame->lock);
-
- if (is_linkfile) {
- gf_log (this->name, GF_LOG_DEBUG,
- "deleting stale linkfile %s on %s",
- loc->path, subvol->name);
- dht_linkfile_unlink (frame, this, subvol, loc);
- }
-
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- hashed_subvol = local->hashed_subvol;
- cached_subvol = local->cached_subvol;
-
- if (local->file_count && local->dir_count) {
- gf_log (this->name, GF_LOG_ERROR,
- "path %s exists as a file on one subvolume "
- "and directory on another. "
- "Please fix it manually",
- loc->path);
- DHT_STACK_UNWIND (frame, -1, EIO, NULL, NULL, NULL);
- return 0;
- }
-
- if (local->dir_count) {
- dht_lookup_directory (frame, this, &local->loc);
- return 0;
- }
-
- if (!cached_subvol) {
- DHT_STACK_UNWIND (frame, -1, ENOENT, NULL, NULL, NULL);
- return 0;
- }
-
- if (!hashed_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "cannot create linkfile file for %s on %s: "
- "hashed subvolume cannot be found.",
- loc->path, cached_subvol->name);
-
- local->op_ret = 0;
- local->op_errno = 0;
-
- ret = dht_layout_inode_set (frame->this, cached_subvol,
- local->inode);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set layout for subvol %s",
- cached_subvol ? cached_subvol->name :
- "<nil>");
- local->op_ret = -1;
- local->op_errno = EINVAL;
- }
+ UNLOCK (&frame->lock);
- DHT_STACK_UNWIND (frame, local->op_ret,
- local->op_errno, local->inode,
- &local->stbuf, local->xattr);
+ if (is_linkfile) {
+ ret = dict_get_int32 (xattr, GLUSTERFS_OPEN_FD_COUNT, &fd_count);
+ /* Delete the linkfile only if there are no open fds on it.
+ if there is a open-fd, it may be in migration */
+ if (!ret && (fd_count == 0)) {
+ gf_log (this->name, GF_LOG_INFO,
+ "deleting stale linkfile %s on %s",
+ loc->path, subvol->name);
+ STACK_WIND (frame, dht_lookup_unlink_cbk,
+ subvol, subvol->fops->unlink, loc, 0, NULL);
return 0;
}
+ }
- gf_log (this->name, GF_LOG_DEBUG,
- "linking file %s existing on %s to %s (hash)",
- loc->path, cached_subvol->name,
- hashed_subvol->name);
-
- dht_linkfile_create (frame,
- dht_lookup_linkfile_create_cbk,
- cached_subvol, hashed_subvol, loc);
- }
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ dht_lookup_everywhere_done (frame, this);
+ }
- return 0;
+out:
+ return ret;
}
int
dht_lookup_everywhere (call_frame_t *frame, xlator_t *this, loc_t *loc)
{
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- int i = 0;
- int call_cnt = 0;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ int i = 0;
+ int call_cnt = 0;
- conf = this->private;
- local = frame->local;
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", this->private, out);
+ GF_VALIDATE_OR_GOTO ("dht", loc, out);
- call_cnt = conf->subvolume_cnt;
- local->call_cnt = call_cnt;
+ conf = this->private;
+ local = frame->local;
- if (!local->inode)
- local->inode = inode_ref (loc->inode);
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_lookup_everywhere_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->lookup,
- loc, local->xattr_req);
- }
+ if (!local->inode)
+ local->inode = inode_ref (loc->inode);
- return 0;
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_lookup_everywhere_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup,
+ loc, local->xattr_req);
+ }
+
+ return 0;
+out:
+ DHT_STACK_UNWIND (lookup, frame, -1, EINVAL, NULL, NULL, NULL, NULL);
+err:
+ return -1;
}
int
dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int op_ret, int op_errno,
- inode_t *inode, struct stat *stbuf, dict_t *xattr)
+ inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
{
- call_frame_t *prev = NULL;
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- xlator_t *subvol = NULL;
- loc_t *loc = NULL;
- dht_conf_t *conf = NULL;
+ call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ loc_t *loc = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, out);
+ GF_VALIDATE_OR_GOTO ("dht", this, unwind);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, unwind);
+ GF_VALIDATE_OR_GOTO ("dht", this->private, unwind);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, unwind);
prev = cookie;
- subvol = prev->this;
- conf = this->private;
- local = frame->local;
- loc = &local->loc;
+ subvol = prev->this;
+ conf = this->private;
+ local = frame->local;
+ loc = &local->loc;
if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "lookup of %s on %s (following linkfile) failed (%s)",
- local->loc.path, subvol->name, strerror (op_errno));
- goto err;
- }
+ gf_log (this->name, GF_LOG_INFO,
+ "lookup of %s on %s (following linkfile) failed (%s)",
+ local->loc.path, subvol->name, strerror (op_errno));
+
+ /* If cached subvol returned ENOTCONN, do not do
+ lookup_everywhere. We need to make sure linkfile does not get
+ removed, which can take away the namespace, and subvol is
+ anyways down. */
+
+ if (op_errno != ENOTCONN)
+ goto err;
+ else
+ goto unwind;
+ }
if (check_is_dir (inode, stbuf, xattr)) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_INFO,
"lookup of %s on %s (following linkfile) reached dir",
local->loc.path, subvol->name);
goto err;
}
- if (check_is_linkfile (inode, stbuf, xattr)) {
- gf_log (this->name, GF_LOG_DEBUG,
+ if (check_is_linkfile (inode, stbuf, xattr, conf->link_xattr_name)) {
+ gf_log (this->name, GF_LOG_INFO,
"lookup of %s on %s (following linkfile) reached link",
local->loc.path, subvol->name);
goto err;
}
- if ((stbuf->st_nlink == 1)
- && (conf->unhashed_sticky_bit)) {
- stbuf->st_mode |= S_ISVTX;
- }
- dht_itransform (this, prev->this, stbuf->st_ino, &stbuf->st_ino);
-
- layout = dht_layout_for_subvol (this, prev->this);
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no pre-set layout for subvolume %s",
- prev->this->name);
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
+ if (uuid_compare (local->gfid, stbuf->ia_gfid)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: gfid different on data file on %s",
+ local->loc.path, subvol->name);
+ goto err;
+ }
- inode_ctx_put (inode, this, (uint64_t)(long)layout);
+ if ((stbuf->ia_nlink == 1)
+ && (conf && conf->unhashed_sticky_bit)) {
+ stbuf->ia_prot.sticky = 1;
+ }
-out:
- DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf, xattr);
+ ret = dht_layout_preset (this, prev->this, inode);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "failed to set layout for subvolume %s",
+ prev->this->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ postparent, 1);
+ }
+
+unwind:
+ DHT_STRIP_PHASE1_FLAGS (stbuf);
+ DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, stbuf, xattr,
+ postparent);
return 0;
err:
dht_lookup_everywhere (frame, this, loc);
-
+out:
return 0;
}
@@ -594,21 +1186,38 @@ dht_lookup_directory (call_frame_t *frame, xlator_t *this, loc_t *loc)
int i = 0;
dht_conf_t *conf = NULL;
dht_local_t *local = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, out);
+ GF_VALIDATE_OR_GOTO ("dht", this, unwind);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, unwind);
+ GF_VALIDATE_OR_GOTO ("dht", this->private, unwind);
+ GF_VALIDATE_OR_GOTO ("dht", loc, unwind);
conf = this->private;
local = frame->local;
call_cnt = conf->subvolume_cnt;
local->call_cnt = call_cnt;
-
+
local->layout = dht_layout_new (this, conf->subvolume_cnt);
if (!local->layout) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- DHT_STACK_UNWIND (frame, -1, ENOMEM, NULL, NULL, NULL);
- return 0;
+ goto unwind;
+ }
+
+ if (local->xattr != NULL) {
+ dict_unref (local->xattr);
+ local->xattr = NULL;
}
-
+
+ if (!uuid_is_null (local->gfid)) {
+ ret = dict_set_static_bin (local->xattr_req, "gfid-req",
+ local->gfid, 16);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set gfid", local->loc.path);
+ }
+
for (i = 0; i < call_cnt; i++) {
STACK_WIND (frame, dht_lookup_dir_cbk,
conf->subvolumes[i],
@@ -616,23 +1225,35 @@ dht_lookup_directory (call_frame_t *frame, xlator_t *this, loc_t *loc)
&local->loc, local->xattr_req);
}
return 0;
+unwind:
+ DHT_STACK_UNWIND (lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL);
+out:
+ return 0;
+
}
int
dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
- inode_t *inode, struct stat *stbuf, dict_t *xattr)
+ inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
{
- dht_layout_t *layout = NULL;
- char is_linkfile = 0;
- char is_dir = 0;
- xlator_t *subvol = NULL;
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- loc_t *loc = NULL;
- call_frame_t *prev = NULL;
+ char is_linkfile = 0;
+ char is_dir = 0;
+ xlator_t *subvol = NULL;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ loc_t *loc = NULL;
+ call_frame_t *prev = NULL;
+ int ret = 0;
+ dht_layout_t *parent_layout = NULL;
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+ GF_VALIDATE_OR_GOTO ("dht", this->private, out);
conf = this->private;
@@ -640,76 +1261,140 @@ dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
loc = &local->loc;
- if (ENTRY_MISSING (op_ret, op_errno)) {
- if (conf->search_unhashed) {
- local->op_errno = ENOENT;
- dht_lookup_everywhere (frame, this, loc);
- return 0;
- }
- }
+ /* This is required for handling stale linkfile deletion,
+ * or any more call which happens from this 'loc'.
+ */
+ if (!op_ret && uuid_is_null (local->gfid))
+ memcpy (local->gfid, stbuf->ia_gfid, 16);
+
+ if (ENTRY_MISSING (op_ret, op_errno)) {
+ gf_log (this->name, GF_LOG_TRACE, "Entry %s missing on subvol"
+ " %s", loc->path, prev->this->name);
+ if (conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_ON) {
+ local->op_errno = ENOENT;
+ dht_lookup_everywhere (frame, this, loc);
+ return 0;
+ }
+ if ((conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_AUTO) &&
+ (loc->parent)) {
+ ret = dht_inode_ctx_layout_get (loc->parent, this,
+ &parent_layout);
+ if (ret || !parent_layout)
+ goto out;
+ if (parent_layout->search_unhashed) {
+ local->op_errno = ENOENT;
+ dht_lookup_everywhere (frame, this, loc);
+ return 0;
+ }
+ }
+ }
- if (op_ret == 0) {
- is_dir = check_is_dir (inode, stbuf, xattr);
- if (is_dir) {
- local->inode = inode_ref (inode);
- local->xattr = dict_ref (xattr);
- }
- }
+ if (op_ret == 0) {
+ is_dir = check_is_dir (inode, stbuf, xattr);
+ if (is_dir) {
+ local->inode = inode_ref (inode);
+ local->xattr = dict_ref (xattr);
+ }
+ }
- if (is_dir || (op_ret == -1 && op_errno == ENOTCONN)) {
+ if (is_dir || (op_ret == -1 && op_errno == ENOTCONN)) {
dht_lookup_directory (frame, this, &local->loc);
return 0;
- }
-
- if (op_ret == -1)
+ }
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG, "Lookup of %s for subvolume"
+ " %s failed with error %s", loc->path, prev->this->name,
+ strerror (op_errno));
goto out;
+ }
- is_linkfile = check_is_linkfile (inode, stbuf, xattr);
- is_dir = check_is_dir (inode, stbuf, xattr);
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr,
+ conf->link_xattr_name);
- if (!is_dir && !is_linkfile) {
+ if (!is_linkfile) {
/* non-directory and not a linkfile */
- dht_itransform (this, prev->this, stbuf->st_ino,
- &stbuf->st_ino);
-
- layout = dht_layout_for_subvol (this, prev->this);
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no pre-set layout for subvolume %s",
- prev->this->name);
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
-
- inode_ctx_put (inode, this, (uint64_t)(long)layout);
- goto out;
- }
-
- if (is_linkfile) {
- subvol = dht_linkfile_subvol (this, inode, stbuf, xattr);
-
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "linkfile not having link subvolume. path=%s",
- loc->path);
- dht_lookup_everywhere (frame, this, loc);
- return 0;
+ ret = dht_layout_preset (this, prev->this, inode);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "could not set pre-set layout for subvolume %s",
+ prev->this->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
}
+ goto out;
+ }
- STACK_WIND (frame, dht_lookup_linkfile_cbk,
- subvol, subvol->fops->lookup,
- &local->loc, local->xattr_req);
+ subvol = dht_linkfile_subvol (this, inode, stbuf, xattr);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "linkfile not having link subvolume. path=%s",
+ loc->path);
+ dht_lookup_everywhere (frame, this, loc);
+ return 0;
}
+ STACK_WIND (frame, dht_lookup_linkfile_cbk,
+ subvol, subvol->fops->lookup,
+ &local->loc, local->xattr_req);
+
return 0;
out:
- DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf, xattr);
+ /*
+ * FIXME: postparent->ia_size and postparent->st_blocks do not have
+ * correct values. since, postparent corresponds to a directory these
+ * two members should have values equal to sum of corresponding values
+ * from each of the subvolume. See dht_iatt_merge for reference.
+ */
+
+ if (!op_ret && local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ postparent, 1);
+ }
+
+ DHT_STRIP_PHASE1_FLAGS (stbuf);
+ DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, stbuf, xattr,
+ postparent);
+err:
return 0;
}
+/* For directories, check if acl xattrs have been requested (by the acl xlator),
+ * if not, request for them. These xattrs are needed for dht dir self-heal to
+ * perform proper self-healing of dirs
+ */
+void
+dht_check_and_set_acl_xattr_req (inode_t *inode, dict_t *xattr_req)
+{
+ int ret = 0;
+
+ GF_ASSERT (inode);
+ GF_ASSERT (xattr_req);
+
+ if (inode->ia_type != IA_IFDIR)
+ return;
+
+ if (!dict_get (xattr_req, POSIX_ACL_ACCESS_XATTR)) {
+ ret = dict_set_int8 (xattr_req, POSIX_ACL_ACCESS_XATTR, 0);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set key %s",
+ POSIX_ACL_ACCESS_XATTR);
+ }
+
+ if (!dict_get (xattr_req, POSIX_ACL_DEFAULT_XATTR)) {
+ ret = dict_set_int8 (xattr_req, POSIX_ACL_DEFAULT_XATTR, 0);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set key %s",
+ POSIX_ACL_DEFAULT_XATTR);
+ }
+
+ return;
+}
int
dht_lookup (call_frame_t *frame, xlator_t *this,
@@ -717,56 +1402,68 @@ dht_lookup (call_frame_t *frame, xlator_t *this,
{
xlator_t *subvol = NULL;
xlator_t *hashed_subvol = NULL;
- xlator_t *cached_subvol = NULL;
dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
+ dht_conf_t *conf = NULL;
int ret = -1;
int op_errno = -1;
- dht_layout_t *layout = NULL;
- int i = 0;
- int call_cnt = 0;
-
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int call_cnt = 0;
+ loc_t new_loc = {0,};
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
- conf = this->private;
-
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ conf = this->private;
+ if (!conf)
+ goto err;
- ret = loc_dup (loc, &local->loc);
- if (ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "copying location failed for path=%s",
- loc->path);
+ local = dht_local_init (frame, loc, NULL, GF_FOP_LOOKUP);
+ if (!local) {
+ op_errno = ENOMEM;
goto err;
}
-
- if (xattr_req) {
- local->xattr_req = dict_ref (xattr_req);
- } else {
- local->xattr_req = dict_new ();
- }
- hashed_subvol = dht_subvol_get_hashed (this, loc);
- cached_subvol = dht_subvol_get_cached (this, loc->inode);
+ ret = dht_filter_loc_subvol_key (this, loc, &new_loc,
+ &hashed_subvol);
+ if (ret) {
+ loc_wipe (&local->loc);
+ ret = loc_dup (&new_loc, &local->loc);
- local->cached_subvol = cached_subvol;
- local->hashed_subvol = hashed_subvol;
+ /* we no more need 'new_loc' entries */
+ loc_wipe (&new_loc);
- if (is_revalidate (loc)) {
- layout = dht_layout_get (this, loc->inode);
+ /* check if loc_dup() is successful */
+ if (ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "copying location failed for path=%s",
+ loc->path);
+ goto err;
+ }
+ }
+
+ if (xattr_req) {
+ local->xattr_req = dict_ref (xattr_req);
+ } else {
+ local->xattr_req = dict_new ();
+ }
+
+ if (uuid_is_null (loc->pargfid) && !uuid_is_null (loc->gfid) &&
+ !__is_root_gfid (loc->inode->gfid)) {
+ local->cached_subvol = NULL;
+ dht_discover (frame, this, loc);
+ return 0;
+ }
+
+ if (!hashed_subvol)
+ hashed_subvol = dht_subvol_get_hashed (this, loc);
+ local->hashed_subvol = hashed_subvol;
+ if (is_revalidate (loc)) {
+ layout = local->layout;
if (!layout) {
gf_log (this->name, GF_LOG_DEBUG,
"revalidate without cache. path=%s",
@@ -775,67 +1472,93 @@ dht_lookup (call_frame_t *frame, xlator_t *this,
goto err;
}
- if (layout->gen && (layout->gen < conf->gen)) {
- gf_log (this->name, GF_LOG_TRACE,
- "incomplete layout failure for path=%s",
- loc->path);
- op_errno = ESTALE;
- goto err;
- }
+ if (layout->gen && (layout->gen < conf->gen)) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "incomplete layout failure for path=%s",
+ loc->path);
- local->inode = inode_ref (loc->inode);
- local->st_ino = loc->inode->ino;
-
- local->call_cnt = layout->cnt;
- call_cnt = local->call_cnt;
-
- /* NOTE: we don't require 'trusted.glusterfs.dht.linkto' attribute,
- * revalidates directly go to the cached-subvolume.
- */
- ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
-
- for (i = 0; i < layout->cnt; i++) {
+ dht_layout_unref (this, local->layout);
+ local->layout = NULL;
+ local->cached_subvol = NULL;
+ goto do_fresh_lookup;
+ }
+
+ local->inode = inode_ref (loc->inode);
+
+ /* NOTE: we don't require 'trusted.glusterfs.dht.linkto' attribute,
+ * revalidates directly go to the cached-subvolume.
+ */
+ ret = dict_set_uint32 (local->xattr_req,
+ conf->xattr_name, 4 * 4);
+
+ if (IA_ISDIR (local->inode->ia_type)) {
+ local->call_cnt = call_cnt = conf->subvolume_cnt;
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_revalidate_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup,
+ loc, local->xattr_req);
+ }
+ return 0;
+ }
+
+ call_cnt = local->call_cnt = layout->cnt;
+
+ /* need it for self-healing linkfiles which is
+ 'in-migration' state */
+ ret = dict_set_uint32 (local->xattr_req,
+ GLUSTERFS_OPEN_FD_COUNT, 4);
+
+ /* need it for dir self-heal */
+ dht_check_and_set_acl_xattr_req (loc->inode, local->xattr_req);
+
+ for (i = 0; i < call_cnt; i++) {
subvol = layout->list[i].xlator;
-
+
STACK_WIND (frame, dht_revalidate_cbk,
subvol, subvol->fops->lookup,
- loc, local->xattr_req);
+ &local->loc, local->xattr_req);
- if (!--call_cnt)
- break;
}
} else {
- /* TODO: remove the hard-coding */
- ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
+ do_fresh_lookup:
+ /* TODO: remove the hard-coding */
+ ret = dict_set_uint32 (local->xattr_req,
+ conf->xattr_name, 4 * 4);
+
+ ret = dict_set_uint32 (local->xattr_req,
+ conf->link_xattr_name, 256);
- ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht.linkto", 256);
+ /* need it for self-healing linkfiles which is
+ 'in-migration' state */
+ ret = dict_set_uint32 (local->xattr_req,
+ GLUSTERFS_OPEN_FD_COUNT, 4);
+
+ /* need it for dir self-heal */
+ dht_check_and_set_acl_xattr_req (loc->inode, local->xattr_req);
if (!hashed_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s, "
- "checking on all the subvols to see if "
- "it is a directory", loc->path);
- call_cnt = conf->subvolume_cnt;
- local->call_cnt = call_cnt;
-
- local->layout = dht_layout_new (this, conf->subvolume_cnt);
- if (!local->layout) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_lookup_dir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->lookup,
- &local->loc, local->xattr_req);
- }
- return 0;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no subvolume in layout for path=%s, "
+ "checking on all the subvols to see if "
+ "it is a directory", loc->path);
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+
+ local->layout = dht_layout_new (this,
+ conf->subvolume_cnt);
+ if (!local->layout) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_lookup_dir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup,
+ &local->loc, local->xattr_req);
+ }
+ return 0;
}
STACK_WIND (frame, dht_lookup_cbk,
@@ -846,1313 +1569,1660 @@ dht_lookup (call_frame_t *frame, xlator_t *this,
return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL);
return 0;
}
int
-dht_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct stat *stbuf)
+dht_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ local = frame->local;
+ prev = cookie;
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto unlock;
+ }
- dht_stat_merge (this, &local->stbuf, stbuf, prev->this);
-
- if (local->inode)
- local->stbuf.st_ino = local->inode->ino;
- local->op_ret = 0;
- }
+ local->op_ret = 0;
+
+ local->postparent = *postparent;
+ local->preparent = *preparent;
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->preparent, 0);
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
+ }
unlock:
- UNLOCK (&frame->lock);
+ UNLOCK (&frame->lock);
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stbuf);
+ DHT_STACK_UNWIND (unlink, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, NULL);
return 0;
}
int
-dht_stat (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
+dht_unlink_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- int i = 0;
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *cached_subvol = NULL;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ local = frame->local;
+ prev = cookie;
- layout = dht_layout_get (this, loc->inode);
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ LOCK (&frame->lock);
+ {
+ if ((op_ret == -1) && !((op_errno == ENOENT) ||
+ (op_errno == ENOTCONN))) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto unlock;
+ }
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ local->op_ret = 0;
+ }
+unlock:
+ UNLOCK (&frame->lock);
- local->inode = inode_ref (loc->inode);
- local->call_cnt = layout->cnt;
+ if (local->op_ret == -1)
+ goto err;
- for (i = 0; i < layout->cnt; i++) {
- subvol = layout->list[i].xlator;
+ cached_subvol = dht_subvol_get_cached (this, local->loc.inode);
+ if (!cached_subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for path=%s",
+ local->loc.path);
+ local->op_errno = EINVAL;
+ goto err;
+ }
- STACK_WIND (frame, dht_attr_cbk,
- subvol, subvol->fops->stat,
- loc);
- }
+ STACK_WIND (frame, dht_unlink_cbk,
+ cached_subvol, cached_subvol->fops->unlink,
+ &local->loc, local->flags, NULL);
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
-
- return 0;
+ DHT_STACK_UNWIND (unlink, frame, -1, local->op_errno,
+ NULL, NULL, NULL);
+ return 0;
}
-
int
-dht_fstat (call_frame_t *frame, xlator_t *this,
- fd_t *fd)
+dht_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- int i = 0;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- layout = dht_layout_get (this, fd->inode);
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ local = frame->local;
+ prev = cookie;
- local->inode = inode_ref (fd->inode);
- local->call_cnt = layout->cnt;;
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto unlock;
+ }
- for (i = 0; i < layout->cnt; i++) {
- subvol = layout->list[i].xlator;
- STACK_WIND (frame, dht_attr_cbk,
- subvol, subvol->fops->fstat,
- fd);
- }
+ local->op_ret = 0;
+ }
+unlock:
+ UNLOCK (&frame->lock);
- return 0;
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ DHT_STACK_UNWIND (setxattr, frame, local->op_ret,
+ local->op_errno, NULL);
+ }
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+}
- return 0;
+static void
+fill_layout_info (dht_layout_t *layout, char *buf)
+{
+ int i = 0;
+ char tmp_buf[128] = {0,};
+
+ for (i = 0; i < layout->cnt; i++) {
+ snprintf (tmp_buf, 128, "(%s %u %u)",
+ layout->list[i].xlator->name,
+ layout->list[i].start,
+ layout->list[i].stop);
+ if (i)
+ strcat (buf, " ");
+ strcat (buf, tmp_buf);
+ }
}
+void
+dht_fill_pathinfo_xattr (xlator_t *this, dht_local_t *local,
+ char *xattr_buf, int32_t alloc_len,
+ int flag, char *layout_buf)
+{
+ if (flag && local->xattr_val)
+ snprintf (xattr_buf, alloc_len,
+ "((<"DHT_PATHINFO_HEADER"%s> %s) (%s-layout %s))",
+ this->name, local->xattr_val, this->name,
+ layout_buf);
+ else if (local->xattr_val)
+ snprintf (xattr_buf, alloc_len,
+ "(<"DHT_PATHINFO_HEADER"%s> %s)",
+ this->name, local->xattr_val);
+ else if (flag)
+ snprintf (xattr_buf, alloc_len, "(%s-layout %s)",
+ this->name, layout_buf);
+}
int
-dht_chmod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode)
+dht_vgetxattr_alloc_and_fill (dht_local_t *local, dict_t *xattr, xlator_t *this,
+ int op_errno)
{
- dht_layout_t *layout = NULL;
- dht_local_t *local = NULL;
- int op_errno = -1;
- int i = -1;
+ int ret = -1;
+ char *value = NULL;
+ int32_t plen = 0;
+ ret = dict_get_str (xattr, local->xsel, &value);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Subvolume %s returned -1 (%s)", this->name,
+ strerror (op_errno));
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto out;
+ }
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ local->alloc_len += strlen(value);
- layout = dht_layout_get (this, loc->inode);
+ if (!local->xattr_val) {
+ local->alloc_len += (strlen (DHT_PATHINFO_HEADER) + 10);
+ local->xattr_val = GF_CALLOC (local->alloc_len, sizeof (char),
+ gf_common_mt_char);
+ if (!local->xattr_val) {
+ ret = -1;
+ goto out;
+ }
+ }
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ if (local->xattr_val) {
+ plen = strlen (local->xattr_val);
+ if (plen) {
+ /* extra byte(s) for \0 to be safe */
+ local->alloc_len += (plen + 2);
+ local->xattr_val = GF_REALLOC (local->xattr_val,
+ local->alloc_len);
+ if (!local->xattr_val) {
+ ret = -1;
+ goto out;
+ }
+ }
- if (!layout_is_sane (layout)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "layout is not sane for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ (void) strcat (local->xattr_val, value);
+ (void) strcat (local->xattr_val, " ");
+ local->op_ret = 0;
+ }
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_DEBUG,
- "memory allocation failed :(");
- goto err;
- }
+ ret = 0;
- local->inode = inode_ref (loc->inode);
- local->call_cnt = layout->cnt;
+ out:
+ return ret;
+}
- for (i = 0; i < layout->cnt; i++) {
- STACK_WIND (frame, dht_attr_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->chmod,
- loc, mode);
- }
+int
+dht_vgetxattr_fill_and_set (dht_local_t *local, dict_t **dict, xlator_t *this,
+ gf_boolean_t flag)
+{
+ int ret = -1;
+ char *xattr_buf = NULL;
+ char layout_buf[8192] = {0,};
- return 0;
+ if (flag)
+ fill_layout_info (local->layout, layout_buf);
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+ *dict = dict_new ();
+ if (!*dict)
+ goto out;
- return 0;
-}
+ local->xattr_val[strlen (local->xattr_val) - 1] = '\0';
+
+ /* we would need max this many bytes to create xattr string
+ * extra 40 bytes is just an estimated amount of additional
+ * space required as we include translator name and some
+ * spaces, brackets etc. when forming the pathinfo string.
+ *
+ * For node-uuid we just don't have all the pretty formatting,
+ * but since this is a generic routine for pathinfo & node-uuid
+ * we dont have conditional space allocation and try to be
+ * generic
+ */
+ local->alloc_len += (2 * strlen (this->name))
+ + strlen (layout_buf)
+ + 40;
+ xattr_buf = GF_CALLOC (local->alloc_len, sizeof (char),
+ gf_common_mt_char);
+ if (!xattr_buf)
+ goto out;
+ if (XATTR_IS_PATHINFO (local->xsel)) {
+ (void) dht_fill_pathinfo_xattr (this, local, xattr_buf,
+ local->alloc_len, flag,
+ layout_buf);
+ } else if (XATTR_IS_NODE_UUID (local->xsel)) {
+ (void) snprintf (xattr_buf, local->alloc_len, "%s",
+ local->xattr_val);
+ } else {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unknown local->xsel (%s)", local->xsel);
+ goto out;
+ }
+
+ ret = dict_set_dynstr (*dict, local->xsel, xattr_buf);
+ GF_FREE (local->xattr_val);
+
+ out:
+ return ret;
+}
int
-dht_chown (call_frame_t *frame, xlator_t *this,
- loc_t *loc, uid_t uid, gid_t gid)
+dht_vgetxattr_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
{
- dht_layout_t *layout = NULL;
- dht_local_t *local = NULL;
- int op_errno = -1;
- int i = -1;
+ int ret = 0;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ dict_t *dict = NULL;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (frame->local, out);
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ local = frame->local;
- layout = dht_layout_get (this, loc->inode);
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ LOCK (&frame->lock);
+ {
+ this_call_cnt = --local->call_cnt;
+ if (op_ret < 0) {
+ if (op_errno != ENOTCONN) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "getxattr err (%s) for dir",
+ strerror (op_errno));
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ }
- if (!layout_is_sane (layout)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "layout is not sane for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ goto unlock;
+ }
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ ret = dht_vgetxattr_alloc_and_fill (local, xattr, this,
+ op_errno);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "alloc or fill failure");
+ }
+ unlock:
+ UNLOCK (&frame->lock);
- local->inode = inode_ref (loc->inode);
- local->call_cnt = layout->cnt;
+ if (!is_last_call (this_call_cnt))
+ goto out;
- for (i = 0; i < layout->cnt; i++) {
- STACK_WIND (frame, dht_attr_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->chown,
- loc, uid, gid);
- }
+ /* -- last call: do patch ups -- */
- return 0;
+ if (local->op_ret == -1) {
+ goto unwind;
+ }
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+ ret = dht_vgetxattr_fill_and_set (local, &dict, this, _gf_true);
+ if (ret)
+ goto unwind;
- return 0;
-}
+ DHT_STACK_UNWIND (getxattr, frame, 0, 0, dict, xdata);
+ goto cleanup;
+ unwind:
+ DHT_STACK_UNWIND (getxattr, frame, -1, local->op_errno, NULL, NULL);
+ cleanup:
+ if (dict)
+ dict_unref (dict);
+ out:
+ return 0;
+}
int
-dht_fchmod (call_frame_t *frame, xlator_t *this,
- fd_t *fd, mode_t mode)
+dht_vgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
{
- dht_layout_t *layout = NULL;
- dht_local_t *local = NULL;
- int op_errno = -1;
- int i = -1;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
+ dht_local_t *local = NULL;
+ int ret = 0;
+ dict_t *dict = NULL;
+ call_frame_t *prev = NULL;
+ gf_boolean_t flag = _gf_true;
- layout = dht_layout_get (this, fd->inode);
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ local = frame->local;
+ prev = cookie;
- if (!layout_is_sane (layout)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "layout is not sane for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_ERROR, "Subvolume %s returned -1 "
+ "(%s)", prev->this->name, strerror (op_errno));
+ goto unwind;
+ }
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ ret = dht_vgetxattr_alloc_and_fill (local, xattr, this,
+ op_errno);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "alloc or fill failure");
+ goto unwind;
+ }
- local->inode = inode_ref (fd->inode);
- local->call_cnt = layout->cnt;
+ flag = (local->layout->cnt > 1) ? _gf_true : _gf_false;
- for (i = 0; i < layout->cnt; i++) {
- STACK_WIND (frame, dht_attr_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->fchmod,
- fd, mode);
- }
+ ret = dht_vgetxattr_fill_and_set (local, &dict, this, flag);
+ if (ret)
+ goto unwind;
- return 0;
+ DHT_STACK_UNWIND (getxattr, frame, 0, 0, dict, xdata);
+ goto cleanup;
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+ unwind:
+ DHT_STACK_UNWIND (getxattr, frame, -1, local->op_errno,
+ NULL, NULL);
+ cleanup:
+ if (dict)
+ dict_unref (dict);
- return 0;
+ return 0;
}
-
int
-dht_fchown (call_frame_t *frame, xlator_t *this,
- fd_t *fd, uid_t uid, gid_t gid)
+dht_linkinfo_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr,
+ dict_t *xdata)
{
- dht_layout_t *layout = NULL;
- dht_local_t *local = NULL;
- int op_errno = -1;
- int i = -1;
+ int ret = 0;
+ char *value = NULL;
+
+ if (op_ret != -1) {
+ ret = dict_get_str (xattr, GF_XATTR_PATHINFO_KEY, &value);
+ if (!ret) {
+ ret = dict_set_str (xattr, GF_XATTR_LINKINFO_KEY, value);
+ if (!ret)
+ gf_log (this->name, GF_LOG_TRACE,
+ "failed to set linkinfo");
+ }
+ }
+ DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, xdata);
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+ return 0;
+}
- layout = dht_layout_get (this, fd->inode);
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+int
+dht_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
+{
+ int this_call_cnt = 0;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
- if (!layout_is_sane (layout)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "layout is not sane for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (frame->local, out);
+ VALIDATE_OR_GOTO (this->private, out);
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ conf = this->private;
+ local = frame->local;
- local->inode = inode_ref (fd->inode);
- local->call_cnt = layout->cnt;
+ this_call_cnt = dht_frame_return (frame);
- for (i = 0; i < layout->cnt; i++) {
- STACK_WIND (frame, dht_attr_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->fchown,
- fd, uid, gid);
- }
+ if (!xattr || (op_ret == -1))
+ goto out;
- return 0;
+ if (dict_get (xattr, conf->xattr_name)) {
+ dict_del (xattr, conf->xattr_name);
+ }
+ local->op_ret = 0;
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+ if (!local->xattr) {
+ local->xattr = dict_copy_with_ref (xattr, NULL);
+ } else {
+ /* first aggregate everything into xattr and then copy into
+ * local->xattr. This is required as we want to have
+ * 'local->xattr' as the proper final dictionary passed above
+ * distribute xlator.
+ */
+ dht_aggregate_xattr (xattr, local->xattr);
+ local->xattr = dict_copy (xattr, local->xattr);
+ }
+out:
+ if (is_last_call (this_call_cnt)) {
+ DHT_STACK_UNWIND (getxattr, frame, local->op_ret, op_errno,
+ local->xattr, NULL);
+ }
+ return 0;
+}
- return 0;
+int32_t
+dht_getxattr_unwind (call_frame_t *frame,
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
+{
+ DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
int
-dht_utimens (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct timespec tv[2])
+dht_getxattr_get_real_filename_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ dict_t *xattr, dict_t *xdata)
{
- dht_layout_t *layout = NULL;
- dht_local_t *local = NULL;
- int op_errno = -1;
- int i = -1;
+ int this_call_cnt = 0;
+ dht_local_t *local = NULL;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ local = frame->local;
- layout = dht_layout_get (this, loc->inode);
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ if (op_ret != -1) {
+ if (local->xattr)
+ dict_unref (local->xattr);
+ local->xattr = dict_ref (xattr);
- if (!layout_is_sane (layout)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "layout is not sane for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
+ if (local->xattr_req)
+ dict_unref (local->xattr_req);
+ local->xattr_req = dict_ref (xdata);
}
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ DHT_STACK_UNWIND (getxattr, frame, local->op_ret, op_errno,
+ local->xattr, local->xattr_req);
}
- local->inode = inode_ref (loc->inode);
- local->call_cnt = layout->cnt;
+ return 0;
+}
- for (i = 0; i < layout->cnt; i++) {
- STACK_WIND (frame, dht_attr_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->utimens,
- loc, tv);
- }
- return 0;
+int
+dht_getxattr_get_real_filename (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *key, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int i = 0;
+ dht_layout_t *layout = NULL;
+ int cnt = 0;
+ xlator_t *subvol = NULL;
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ local = frame->local;
+ layout = local->layout;
+
+ cnt = local->call_cnt = layout->cnt;
+
+ local->op_ret = -1;
+ local->op_errno = ENODATA;
+
+ for (i = 0; i < cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND (frame, dht_getxattr_get_real_filename_cbk,
+ subvol, subvol->fops->getxattr,
+ loc, key, xdata);
+ }
return 0;
}
int
-dht_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset)
+dht_getxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *key, dict_t *xdata)
+#define DHT_IS_DIR(layout) (layout->cnt > 1)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *cached_subvol = NULL;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ xlator_t **sub_volumes = NULL;
+ int op_errno = -1;
+ int i = 0;
+ int cnt = 0;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ VALIDATE_OR_GOTO (this->private, err);
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ conf = this->private;
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ local = dht_local_init (frame, loc, NULL, GF_FOP_GETXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
- local->inode = inode_ref (loc->inode);
- local->call_cnt = 1;
-
- STACK_WIND (frame, dht_attr_cbk,
- subvol, subvol->fops->truncate,
- loc, offset);
-
- return 0;
+ goto err;
+ }
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+ layout = local->layout;
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "layout is NULL");
+ op_errno = ENOENT;
+ goto err;
+ }
- return 0;
-}
+ if (key) {
+ local->key = gf_strdup (key);
+ if (!local->key) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ }
+ if (key &&
+ (strncmp (key, GF_XATTR_GET_REAL_FILENAME_KEY,
+ strlen (GF_XATTR_GET_REAL_FILENAME_KEY)) == 0)
+ && DHT_IS_DIR(layout)) {
+ dht_getxattr_get_real_filename (frame, this, loc, key, xdata);
+ return 0;
+ }
-int
-dht_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
+ /* for file use cached subvolume (obviously!): see if {}
+ * below
+ * for directory:
+ * wind to all subvolumes and exclude subvolumes which
+ * return ENOTCONN (in callback)
+ *
+ * NOTE: Don't trust inode here, as that may not be valid
+ * (until inode_link() happens)
+ */
+ if (key && DHT_IS_DIR(layout) &&
+ ((strcmp (key, GF_XATTR_PATHINFO_KEY) == 0)
+ || (strcmp (key, GF_XATTR_NODE_UUID_KEY) == 0))) {
+ (void) strncpy (local->xsel, key, 256);
+ cnt = local->call_cnt = layout->cnt;
+ for (i = 0; i < cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND (frame, dht_vgetxattr_dir_cbk,
+ subvol, subvol->fops->getxattr,
+ loc, key, NULL);
+ }
+ return 0;
+ }
+ /* node-uuid or pathinfo for files */
+ if (key && ((strcmp (key, GF_XATTR_NODE_UUID_KEY) == 0)
+ || (strcmp (key, GF_XATTR_PATHINFO_KEY) == 0))) {
+ cached_subvol = local->cached_subvol;
+ (void) strncpy (local->xsel, key, 256);
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+ local->call_cnt = 1;
+ STACK_WIND (frame, dht_vgetxattr_cbk, cached_subvol,
+ cached_subvol->fops->getxattr, loc, key, NULL);
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ return 0;
+ }
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ if (key && (strcmp (key, GF_XATTR_LINKINFO_KEY) == 0)) {
+ hashed_subvol = dht_subvol_get_hashed (this, loc);
+ if (!hashed_subvol) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get"
+ "hashed subvol for %s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
- local->inode = inode_ref (fd->inode);
- local->call_cnt = 1;
+ cached_subvol = dht_subvol_get_cached (this, loc->inode);
+ if (!cached_subvol) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get"
+ "cached subvol for %s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
- STACK_WIND (frame, dht_attr_cbk,
- subvol, subvol->fops->ftruncate,
- fd, offset);
+ if (hashed_subvol == cached_subvol) {
+ op_errno = ENODATA;
+ goto err;
+ }
+ if (hashed_subvol) {
+ STACK_WIND (frame, dht_linkinfo_getxattr_cbk, hashed_subvol,
+ hashed_subvol->fops->getxattr, loc,
+ GF_XATTR_PATHINFO_KEY, NULL);
+ return 0;
+ }
+ op_errno = ENODATA;
+ goto err;
+ }
- return 0;
+ if (key && (!strcmp (GF_XATTR_MARKER_KEY, key))
+ && (GF_CLIENT_PID_GSYNCD == frame->root->pid)) {
+ if (DHT_IS_DIR(layout)) {
+ cnt = layout->cnt;
+ } else {
+ cnt = 1;
+ }
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+ sub_volumes = alloca ( cnt * sizeof (xlator_t *));
+ for (i = 0; i < cnt; i++)
+ *(sub_volumes + i) = layout->list[i].xlator;
- return 0;
-}
+ if (cluster_getmarkerattr (frame, this, loc, key,
+ local, dht_getxattr_unwind,
+ sub_volumes, cnt,
+ MARKER_UUID_TYPE, marker_uuid_default_gauge,
+ conf->vol_uuid)) {
+ op_errno = EINVAL;
+ goto err;
+ }
+ return 0;
+ }
-int
-dht_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
+ if (key && *conf->vol_uuid) {
+ if ((match_uuid_local (key, conf->vol_uuid) == 0) &&
+ (GF_CLIENT_PID_GSYNCD == frame->root->pid)) {
+ if (DHT_IS_DIR(layout)) {
+ cnt = layout->cnt;
+ } else {
+ cnt = 1;
+ }
+ sub_volumes = alloca ( cnt * sizeof (xlator_t *));
+ for (i = 0; i < cnt; i++)
+ sub_volumes[i] = layout->list[i].xlator;
+
+ if (cluster_getmarkerattr (frame, this, loc, key,
+ local, dht_getxattr_unwind,
+ sub_volumes, cnt,
+ MARKER_XTIME_TYPE,
+ marker_xtime_default_gauge,
+ conf->vol_uuid)) {
+ op_errno = EINVAL;
+ goto err;
+ }
+ return 0;
+ }
+ }
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
+ if (DHT_IS_DIR(layout)) {
+ cnt = local->call_cnt = layout->cnt;
+ } else {
+ cnt = local->call_cnt = 1;
+ }
- local->op_ret = 0;
- }
-unlock:
- UNLOCK (&frame->lock);
+ for (i = 0; i < cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND (frame, dht_getxattr_cbk,
+ subvol, subvol->fops->getxattr,
+ loc, key, NULL);
+ }
+ return 0;
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno);
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL);
return 0;
}
-
+#undef DHT_IS_DIR
int
-dht_access (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t mask)
+dht_fgetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *key, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
+ xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int op_errno = -1;
+ int i = 0;
+ int cnt = 0;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+ VALIDATE_OR_GOTO (this->private, err);
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FGETXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "layout is NULL");
+ op_errno = ENOENT;
+ goto err;
+ }
- local->call_cnt = 1;
+ if (key) {
+ local->key = gf_strdup (key);
+ if (!local->key) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ }
- STACK_WIND (frame, dht_err_cbk,
- subvol, subvol->fops->access,
- loc, mask);
+ if ((fd->inode->ia_type == IA_IFDIR)
+ && (strncmp (key, GF_XATTR_LOCKINFO_KEY,
+ strlen (GF_XATTR_LOCKINFO_KEY) != 0))) {
+ cnt = local->call_cnt = layout->cnt;
+ } else {
+ cnt = local->call_cnt = 1;
+ }
- return 0;
+ for (i = 0; i < cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND (frame, dht_getxattr_cbk,
+ subvol, subvol->fops->fgetxattr,
+ fd, key, NULL);
+ }
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fgetxattr, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
-
int
-dht_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, const char *path)
+dht_fsetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, dict_t *xattr, int flags, dict_t *xdata)
{
- DHT_STACK_UNWIND (frame, op_ret, op_errno, path);
+ xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
+ dht_conf_t *conf = NULL;
- return 0;
-}
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+ VALIDATE_OR_GOTO (this->private, err);
+ conf = this->private;
-int
-dht_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
+ GF_IF_INTERNAL_XATTR_GOTO (conf->wild_xattr_name, xattr,
+ op_errno, err);
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FSETXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ local->call_cnt = 1;
- STACK_WIND (frame, dht_readlink_cbk,
- subvol, subvol->fops->readlink,
- loc, size);
+ STACK_WIND (frame, dht_err_cbk, subvol, subvol->fops->fsetxattr,
+ fd, xattr, flags, NULL);
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
-int
-dht_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr)
+static int
+dht_common_setxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
{
- DHT_STACK_UNWIND (frame, op_ret, op_errno, xattr);
+ DHT_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
return 0;
}
-
int
-dht_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *key)
+dht_checking_pathinfo_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr,
+ dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
+ int i = -1;
+ int ret = -1;
+ char *value = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ call_frame_t *prev = NULL;
+ int this_call_cnt = 0;
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ if (op_ret == -1)
+ goto out;
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
- STACK_WIND (frame, dht_getxattr_cbk,
- subvol, subvol->fops->getxattr,
- loc, key);
+ ret = dict_get_str (xattr, GF_XATTR_PATHINFO_KEY, &value);
+ if (ret)
+ goto out;
- return 0;
+ if (!strcmp (value, local->key)) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == prev->this)
+ conf->decommissioned_bricks[i] = prev->this;
+ }
+ }
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+out:
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ DHT_STACK_UNWIND (setxattr, frame, local->op_ret, ENOTSUP, NULL);
+ }
+ return 0;
- return 0;
}
-
int
dht_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr, int flags)
+ loc_t *loc, dict_t *xattr, int flags, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
+ xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int op_errno = EINVAL;
+ int ret = -1;
+ data_t *tmp = NULL;
+ uint32_t dir_spread = 0;
+ char value[4096] = {0,};
+ gf_dht_migrate_data_type_t forced_rebalance = GF_DHT_MIGRATE_DATA;
+ int call_cnt = 0;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ conf = this->private;
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ GF_IF_INTERNAL_XATTR_GOTO (conf->wild_xattr_name, xattr,
+ op_errno, err);
- local->call_cnt = 1;
+ local = dht_local_init (frame, loc, NULL, GF_FOP_SETXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- STACK_WIND (frame, dht_err_cbk,
- subvol, subvol->fops->setxattr,
- loc, xattr, flags);
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
- return 0;
+ layout = local->layout;
+ if (!layout) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no layout for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+ local->call_cnt = call_cnt = layout->cnt;
- return 0;
-}
+ tmp = dict_get (xattr, "distribute.migrate-data");
+ if (tmp) {
+ if (IA_ISDIR (loc->inode->ia_type)) {
+ op_errno = ENOTSUP;
+ goto err;
+ }
+ /* TODO: need to interpret the 'value' for more meaning
+ (ie, 'target' subvolume given there, etc) */
+ memcpy (value, tmp->data, tmp->len);
+ if (strcmp (value, "force") == 0)
+ forced_rebalance =
+ GF_DHT_MIGRATE_DATA_EVEN_IF_LINK_EXISTS;
-int
-dht_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *key)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
+ if (conf->decommission_in_progress)
+ forced_rebalance = GF_DHT_MIGRATE_HARDLINK;
+ local->rebalance.target_node = dht_subvol_get_hashed (this, loc);
+ if (!local->rebalance.target_node) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get "
+ "hashed subvol for %s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ local->rebalance.from_subvol = local->cached_subvol;
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ if (local->rebalance.target_node == local->rebalance.from_subvol) {
+ op_errno = EEXIST;
+ goto err;
+ }
+ if (local->rebalance.target_node) {
+ local->flags = forced_rebalance;
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ ret = dht_start_rebalance_task (this, frame);
+ if (!ret)
+ return 0;
- local->call_cnt = 1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to create a new synctask",
+ loc->path);
+ }
+ op_errno = EINVAL;
+ goto err;
- STACK_WIND (frame, dht_err_cbk,
- subvol, subvol->fops->removexattr,
- loc, key);
+ }
- return 0;
+ tmp = dict_get (xattr, "decommission-brick");
+ if (tmp) {
+ /* This operation should happen only on '/' */
+ if (!__is_root_gfid (loc->inode->gfid)) {
+ op_errno = ENOTSUP;
+ goto err;
+ }
+
+ memcpy (value, tmp->data, ((tmp->len < 4095) ? tmp->len : 4095));
+ local->key = gf_strdup (value);
+ local->call_cnt = conf->subvolume_cnt;
+
+ for (i = 0 ; i < conf->subvolume_cnt; i++) {
+ /* Get the pathinfo, and then compare */
+ STACK_WIND (frame, dht_checking_pathinfo_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->getxattr,
+ loc, GF_XATTR_PATHINFO_KEY, NULL);
+ }
+ return 0;
+ }
+
+ tmp = dict_get (xattr, GF_XATTR_FIX_LAYOUT_KEY);
+ if (tmp) {
+ gf_log (this->name, GF_LOG_INFO,
+ "fixing the layout of %s", loc->path);
+
+ ret = dht_fix_directory_layout (frame, dht_common_setxattr_cbk,
+ layout);
+ if (ret) {
+ op_errno = ENOTCONN;
+ goto err;
+ }
+ return ret;
+ }
+
+ tmp = dict_get (xattr, "distribute.directory-spread-count");
+ if (tmp) {
+ /* Setxattr value is packed as 'binary', not string */
+ memcpy (value, tmp->data, ((tmp->len < 4095)?tmp->len:4095));
+ ret = gf_string2uint32 (value, &dir_spread);
+ if (!ret && ((dir_spread <= conf->subvolume_cnt) &&
+ (dir_spread > 0))) {
+ layout->spread_cnt = dir_spread;
+
+ ret = dht_fix_directory_layout (frame,
+ dht_common_setxattr_cbk,
+ layout);
+ if (ret) {
+ op_errno = ENOTCONN;
+ goto err;
+ }
+ return ret;
+ }
+ gf_log (this->name, GF_LOG_ERROR,
+ "wrong 'directory-spread-count' value (%s)", value);
+ op_errno = ENOTSUP;
+ goto err;
+ }
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_err_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->setxattr,
+ loc, xattr, flags, xdata);
+ }
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
int
-dht_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, fd_t *fd)
+dht_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+ local = frame->local;
+ prev = cookie;
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto unlock;
+ }
- local->op_ret = 0;
- }
+ local->op_ret = 0;
+ }
unlock:
- UNLOCK (&frame->lock);
+ UNLOCK (&frame->lock);
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->fd);
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ DHT_STACK_UNWIND (removexattr, frame, local->op_ret,
+ local->op_errno, NULL);
+ }
return 0;
}
int
-dht_open (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags, fd_t *fd)
+dht_removexattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *key, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int ret = -1;
+ xlator_t *subvol = NULL;
int op_errno = -1;
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int call_cnt = 0;
+ dht_conf_t *conf = NULL;
+ int i;
- VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (this->private, err);
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ conf = this->private;
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ GF_IF_NATIVE_XATTR_GOTO (conf->wild_xattr_name, key, op_errno, err);
- local->fd = fd_ref (fd);
- ret = loc_dup (loc, &local->loc);
- if (ret == -1) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
- local->call_cnt = 1;
+ local = dht_local_init (frame, loc, NULL, GF_FOP_REMOVEXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- STACK_WIND (frame, dht_fd_cbk,
- subvol, subvol->fops->open,
- loc, flags, fd);
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
- return 0;
+ layout = local->layout;
+ if (!local->layout) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no layout for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+ local->call_cnt = call_cnt = layout->cnt;
+ local->key = gf_strdup (key);
- return 0;
-}
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_removexattr_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->removexattr,
+ loc, key, NULL);
+ }
+ return 0;
-int
-dht_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- struct iovec *vector, int count, struct stat *stbuf,
- struct iobref *iobref)
-{
- DHT_STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf,
- iobref);
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
return 0;
}
-
int
-dht_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t off)
+dht_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *key, dict_t *xdata)
{
- xlator_t *subvol = NULL;
+ xlator_t *subvol = NULL;
int op_errno = -1;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int call_cnt = 0;
+ dht_conf_t *conf = 0;
+ int i;
- VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (this->private, err);
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ conf = this->private;
- STACK_WIND (frame, dht_readv_cbk,
- subvol, subvol->fops->readv,
- fd, size, off);
+ GF_IF_NATIVE_XATTR_GOTO (conf->wild_xattr_name, key, op_errno, err);
- return 0;
+ VALIDATE_OR_GOTO (frame, err);
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL, 0, NULL, NULL);
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FREMOVEXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- return 0;
-}
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for inode=%s",
+ uuid_utoa (fd->inode->gfid));
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!local->layout) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no layout for inode=%s", uuid_utoa (fd->inode->gfid));
+ op_errno = EINVAL;
+ goto err;
+ }
+ local->call_cnt = call_cnt = layout->cnt;
+ local->key = gf_strdup (key);
-int
-dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct stat *stbuf)
-{
- DHT_STACK_UNWIND (frame, op_ret, op_errno, stbuf);
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_removexattr_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->fremovexattr,
+ fd, key, NULL);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fremovexattr, frame, -1, op_errno, NULL);
return 0;
}
int
-dht_writev (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct iovec *vector, int count, off_t off,
- struct iobref *iobref)
+dht_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+ local = frame->local;
+ prev = cookie;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto unlock;
+ }
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ local->op_ret = 0;
+ }
+unlock:
+ UNLOCK (&frame->lock);
- STACK_WIND (frame, dht_writev_cbk,
- subvol, subvol->fops->writev,
- fd, vector, count, off, iobref);
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt))
+ DHT_STACK_UNWIND (open, frame, local->op_ret, local->op_errno,
+ local->fd, NULL);
- return 0;
+ return 0;
+}
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL, 0);
+/*
+ * dht_normalize_stats -
+ */
+static void
+dht_normalize_stats (struct statvfs *buf, unsigned long bsize,
+ unsigned long frsize)
+{
+ double factor = 0;
- return 0;
-}
+ if (buf->f_bsize != bsize) {
+ buf->f_bsize = bsize;
+ }
+
+ if (buf->f_frsize != frsize) {
+ factor = ((double) buf->f_frsize) / frsize;
+ buf->f_frsize = frsize;
+ buf->f_blocks = (fsblkcnt_t) (factor * buf->f_blocks);
+ buf->f_bfree = (fsblkcnt_t) (factor * buf->f_bfree);
+ buf->f_bavail = (fsblkcnt_t) (factor * buf->f_bavail);
+ }
+}
int
-dht_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+dht_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct statvfs *statvfs, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ int bsize = 0;
+ int frsize = 0;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+ local = frame->local;
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ goto unlock;
+ }
+ local->op_ret = 0;
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ if (local->statvfs.f_bsize != 0) {
+ bsize = max(local->statvfs.f_bsize, statvfs->f_bsize);
+ frsize = max(local->statvfs.f_frsize, statvfs->f_frsize);
+ dht_normalize_stats(&local->statvfs, bsize, frsize);
+ dht_normalize_stats(statvfs, bsize, frsize);
+ } else {
+ local->statvfs.f_bsize = statvfs->f_bsize;
+ local->statvfs.f_frsize = statvfs->f_frsize;
+ }
- local->fd = fd_ref (fd);
- local->call_cnt = 1;
+ local->statvfs.f_blocks += statvfs->f_blocks;
+ local->statvfs.f_bfree += statvfs->f_bfree;
+ local->statvfs.f_bavail += statvfs->f_bavail;
+ local->statvfs.f_files += statvfs->f_files;
+ local->statvfs.f_ffree += statvfs->f_ffree;
+ local->statvfs.f_favail += statvfs->f_favail;
+ local->statvfs.f_fsid = statvfs->f_fsid;
+ local->statvfs.f_flag = statvfs->f_flag;
+ local->statvfs.f_namemax = statvfs->f_namemax;
- STACK_WIND (frame, dht_err_cbk,
- subvol, subvol->fops->flush, fd);
+ }
+unlock:
+ UNLOCK (&frame->lock);
- return 0;
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno);
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt))
+ DHT_STACK_UNWIND (statfs, frame, local->op_ret, local->op_errno,
+ &local->statvfs, xdata);
- return 0;
+ return 0;
}
int
-dht_fsync (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int datasync)
+dht_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- xlator_t *subvol = NULL;
+ xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
int op_errno = -1;
- dht_local_t *local = NULL;
-
+ int i = -1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (this->private, err);
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ conf = this->private;
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
- local->call_cnt = 1;
+ local = dht_local_init (frame, NULL, NULL, GF_FOP_STATFS);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- STACK_WIND (frame, dht_err_cbk,
- subvol, subvol->fops->fsync,
- fd, datasync);
+ if (IA_ISDIR (loc->inode->ia_type)) {
+ local->call_cnt = conf->subvolume_cnt;
- return 0;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND (frame, dht_statfs_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->statfs, loc,
+ xdata);
+ }
+ return 0;
+ }
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno);
+ subvol = dht_subvol_get_cached (this, loc->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
- return 0;
-}
+ local->call_cnt = 1;
+ STACK_WIND (frame, dht_statfs_cbk,
+ subvol, subvol->fops->statfs, loc, xdata);
-int
-dht_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct flock *flock)
-{
- DHT_STACK_UNWIND (frame, op_ret, op_errno, flock);
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL);
return 0;
}
int
-dht_lk (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int cmd, struct flock *flock)
+dht_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
{
- xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
int op_errno = -1;
-
+ int i = -1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (this->private, err);
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, dht_lk_cbk,
- subvol, subvol->fops->lk,
- fd, cmd, flock);
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
-
- return 0;
-}
-
-
-int
-dht_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct statvfs *statvfs)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
+ conf = this->private;
+ local = dht_local_init (frame, loc, fd, GF_FOP_OPENDIR);
+ if (!local) {
+ op_errno = ENOMEM;
- local = frame->local;
+ goto err;
+ }
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- goto unlock;
- }
- local->op_ret = 0;
-
- /* TODO: normalize sizes */
- local->statvfs.f_bsize = statvfs->f_bsize;
- local->statvfs.f_frsize = statvfs->f_frsize;
-
- local->statvfs.f_blocks += statvfs->f_blocks;
- local->statvfs.f_bfree += statvfs->f_bfree;
- local->statvfs.f_bavail += statvfs->f_bavail;
- local->statvfs.f_files += statvfs->f_files;
- local->statvfs.f_ffree += statvfs->f_ffree;
- local->statvfs.f_favail += statvfs->f_favail;
- local->statvfs.f_fsid = statvfs->f_fsid;
- local->statvfs.f_flag = statvfs->f_flag;
- local->statvfs.f_namemax = statvfs->f_namemax;
+ local->call_cnt = conf->subvolume_cnt;
- }
-unlock:
- UNLOCK (&frame->lock);
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND (frame, dht_fd_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->opendir,
+ loc, fd, xdata);
+ }
+ return 0;
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->statvfs);
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (opendir, frame, -1, op_errno, NULL, NULL);
return 0;
}
int
-dht_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
+dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, gf_dirent_t *orig_entries, dict_t *xdata)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int op_errno = -1;
- int i = -1;
+ dht_local_t *local = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *orig_entry = NULL;
+ gf_dirent_t *entry = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *next_subvol = NULL;
+ off_t next_offset = 0;
+ int count = 0;
+ dht_layout_t *layout = 0;
+ dht_conf_t *conf = NULL;
+ xlator_t *subvol = 0;
+ int ret = 0;
+ INIT_LIST_HEAD (&entries.list);
+ prev = cookie;
+ local = frame->local;
+ conf = this->private;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ if (op_ret < 0)
+ goto done;
- conf = this->private;
+ if (!local->layout)
+ local->layout = dht_layout_get (this, local->fd->inode);
- local = dht_local_init (frame);
- local->call_cnt = conf->subvolume_cnt;
+ layout = local->layout;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (frame, dht_statfs_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->statfs, loc);
- }
+ list_for_each_entry (orig_entry, (&orig_entries->list), list) {
+ next_offset = orig_entry->d_off;
+ if (check_is_dir (NULL, (&orig_entry->d_stat), NULL) &&
+ (prev->this != local->first_up_subvol)) {
+ continue;
+ }
+ if (check_is_linkfile (NULL, (&orig_entry->d_stat),
+ orig_entry->dict,
+ conf->link_xattr_name)) {
+ continue;
+ }
- return 0;
+ entry = gf_dirent_for_name (orig_entry->d_name);
+ if (!entry) {
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+ goto unwind;
+ }
- return 0;
-}
+ /* Do this if conf->search_unhashed is set to "auto" */
+ if (conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_AUTO) {
+ subvol = dht_layout_search (this, layout,
+ orig_entry->d_name);
+ if (!subvol || (subvol != prev->this)) {
+ /* TODO: Count the number of entries which need
+ linkfile to prove its existence in fs */
+ layout->search_unhashed++;
+ }
+ }
+ dht_itransform (this, prev->this, orig_entry->d_off,
+ &entry->d_off);
-int
-dht_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
-{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int ret = -1;
- int op_errno = -1;
- int i = -1;
+ entry->d_stat = orig_entry->d_stat;
+ entry->d_ino = orig_entry->d_ino;
+ entry->d_type = orig_entry->d_type;
+ entry->d_len = orig_entry->d_len;
+ if (orig_entry->dict)
+ entry->dict = dict_ref (orig_entry->dict);
+
+ /* making sure we set the inode ctx right with layout,
+ currently possible only for non-directories, so for
+ directories don't set entry inodes */
+ if (!IA_ISDIR(entry->d_stat.ia_type)) {
+ ret = dht_layout_preset (this, prev->this,
+ orig_entry->inode);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to link the layout in inode");
+ entry->inode = inode_ref (orig_entry->inode);
+ } else if (orig_entry->inode) {
+ dht_inode_ctx_time_update (orig_entry->inode, this,
+ &entry->d_stat, 1);
+ }
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+ list_add_tail (&entry->list, &entries.list);
+ count++;
+ }
+ op_ret = count;
+ /* We need to ensure that only the last subvolume's end-of-directory
+ * notification is respected so that directory reading does not stop
+ * before all subvolumes have been read. That could happen because the
+ * posix for each subvolume sends a ENOENT on end-of-directory but in
+ * distribute we're not concerned only with a posix's view of the
+ * directory but the aggregated namespace' view of the directory.
+ */
+ if (prev->this != dht_last_up_subvol (this))
+ op_errno = 0;
- conf = this->private;
+done:
+ if (count == 0) {
+ /* non-zero next_offset means that
+ EOF is not yet hit on the current subvol
+ */
+ if (next_offset == 0) {
+ next_subvol = dht_subvol_next (this, prev->this);
+ } else {
+ next_subvol = prev->this;
+ }
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ if (!next_subvol) {
+ goto unwind;
+ }
- local->fd = fd_ref (fd);
- ret = loc_dup (loc, &local->loc);
- if (ret == -1) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ if (conf->readdir_optimize == _gf_true) {
+ if (next_subvol != local->first_up_subvol) {
+ ret = dict_set_int32 (local->xattr,
+ GF_READDIR_SKIP_DIRS, 1);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "dict set failed");
+ } else {
+ dict_del (local->xattr,
+ GF_READDIR_SKIP_DIRS);
+ }
+ }
- local->call_cnt = conf->subvolume_cnt;
+ STACK_WIND (frame, dht_readdirp_cbk,
+ next_subvol, next_subvol->fops->readdirp,
+ local->fd, local->size, next_offset,
+ local->xattr);
+ return 0;
+ }
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (frame, dht_fd_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->opendir,
- loc, fd);
- }
+unwind:
+ if (op_ret < 0)
+ op_ret = 0;
- return 0;
+ DHT_STACK_UNWIND (readdirp, frame, op_ret, op_errno, &entries, NULL);
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+ gf_dirent_free (&entries);
- return 0;
+ return 0;
}
+
int
dht_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *orig_entries)
-{
- dht_local_t *local = NULL;
- gf_dirent_t entries;
- gf_dirent_t *orig_entry = NULL;
- gf_dirent_t *entry = NULL;
- call_frame_t *prev = NULL;
- xlator_t *next_subvol = NULL;
+ int op_ret, int op_errno, gf_dirent_t *orig_entries,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *orig_entry = NULL;
+ gf_dirent_t *entry = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *next_subvol = NULL;
off_t next_offset = 0;
- int count = 0;
+ int count = 0;
+ dht_layout_t *layout = 0;
+ xlator_t *subvol = 0;
+ INIT_LIST_HEAD (&entries.list);
+ prev = cookie;
+ local = frame->local;
- INIT_LIST_HEAD (&entries.list);
- prev = cookie;
- local = frame->local;
+ if (op_ret < 0)
+ goto done;
- if (op_ret < 0)
- goto done;
+ if (!local->layout)
+ local->layout = dht_layout_get (this, local->fd->inode);
- list_for_each_entry (orig_entry, (&orig_entries->list), list) {
+ layout = local->layout;
+
+ list_for_each_entry (orig_entry, (&orig_entries->list), list) {
next_offset = orig_entry->d_off;
- if (check_is_linkfile (NULL, (&orig_entry->d_stat), NULL)
- || (check_is_dir (NULL, (&orig_entry->d_stat), NULL)
- && (prev->this != dht_first_up_subvol (this)))) {
- continue;
- }
+ subvol = dht_layout_search (this, layout, orig_entry->d_name);
- entry = gf_dirent_for_name (orig_entry->d_name);
- if (!entry) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto unwind;
- }
+ if (!subvol || (subvol == prev->this)) {
+ entry = gf_dirent_for_name (orig_entry->d_name);
+ if (!entry) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto unwind;
+ }
- dht_itransform (this, prev->this, orig_entry->d_ino,
- &entry->d_ino);
- dht_itransform (this, prev->this, orig_entry->d_off,
- &entry->d_off);
+ dht_itransform (this, prev->this, orig_entry->d_off,
+ &entry->d_off);
- entry->d_type = orig_entry->d_type;
- entry->d_len = orig_entry->d_len;
+ entry->d_ino = orig_entry->d_ino;
+ entry->d_type = orig_entry->d_type;
+ entry->d_len = orig_entry->d_len;
- list_add_tail (&entry->list, &entries.list);
- count++;
- }
- op_ret = count;
+ list_add_tail (&entry->list, &entries.list);
+ count++;
+ }
+ }
+ op_ret = count;
+ /* We need to ensure that only the last subvolume's end-of-directory
+ * notification is respected so that directory reading does not stop
+ * before all subvolumes have been read. That could happen because the
+ * posix for each subvolume sends a ENOENT on end-of-directory but in
+ * distribute we're not concerned only with a posix's view of the
+ * directory but the aggregated namespace' view of the directory.
+ */
+ if (prev->this != dht_last_up_subvol (this))
+ op_errno = 0;
done:
- if (count == 0) {
+ if (count == 0) {
/* non-zero next_offset means that
EOF is not yet hit on the current subvol
*/
@@ -2162,771 +3232,911 @@ done:
next_subvol = prev->this;
}
- if (!next_subvol) {
- goto unwind;
- }
+ if (!next_subvol) {
+ goto unwind;
+ }
- STACK_WIND (frame, dht_readdir_cbk,
- next_subvol, next_subvol->fops->readdir,
- local->fd, local->size, next_offset);
- return 0;
- }
+ STACK_WIND (frame, dht_readdir_cbk,
+ next_subvol, next_subvol->fops->readdir,
+ local->fd, local->size, next_offset, NULL);
+ return 0;
+ }
unwind:
- if (op_ret < 0)
- op_ret = 0;
+ if (op_ret < 0)
+ op_ret = 0;
- DHT_STACK_UNWIND (frame, op_ret, op_errno, &entries);
+ DHT_STACK_UNWIND (readdir, frame, op_ret, op_errno, &entries, NULL);
- gf_dirent_free (&entries);
+ gf_dirent_free (&entries);
return 0;
}
int
-dht_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t yoff)
+dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t yoff, int whichop, dict_t *dict)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
int op_errno = -1;
- xlator_t *xvol = NULL;
- off_t xoff = 0;
-
+ xlator_t *xvol = NULL;
+ off_t xoff = 0;
+ int ret = 0;
+ dht_conf_t *conf = NULL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (this->private, err);
- conf = this->private;
-
- local = dht_local_init (frame);
- if (!local) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
+ conf = this->private;
- local->fd = fd_ref (fd);
- local->size = size;
+ local = dht_local_init (frame, NULL, NULL, whichop);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- dht_deitransform (this, yoff, &xvol, (uint64_t *)&xoff);
+ local->fd = fd_ref (fd);
+ local->size = size;
+ local->xattr_req = (dict)? dict_ref (dict) : NULL;
+ local->first_up_subvol = dht_first_up_subvol (this);
+
+ dht_deitransform (this, yoff, &xvol, (uint64_t *)&xoff);
+
+ /* TODO: do proper readdir */
+ if (whichop == GF_FOP_READDIRP) {
+ if (dict)
+ local->xattr = dict_ref (dict);
+ else
+ local->xattr = dict_new ();
+
+ if (local->xattr) {
+ ret = dict_set_uint32 (local->xattr,
+ conf->link_xattr_name, 256);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set '%s' key",
+ conf->link_xattr_name);
+ if (conf->readdir_optimize == _gf_true) {
+ if (xvol != local->first_up_subvol) {
+ ret = dict_set_int32 (local->xattr,
+ GF_READDIR_SKIP_DIRS, 1);
+ if (ret)
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "Dict set failed");
+ } else {
+ dict_del (local->xattr,
+ GF_READDIR_SKIP_DIRS);
+ }
+ }
+ }
- /* TODO: do proper readdir */
- STACK_WIND (frame, dht_readdir_cbk,
- xvol, xvol->fops->readdir,
- fd, size, xoff);
+ STACK_WIND (frame, dht_readdirp_cbk, xvol, xvol->fops->readdirp,
+ fd, size, xoff, local->xattr);
+ } else {
+ STACK_WIND (frame, dht_readdir_cbk, xvol, xvol->fops->readdir,
+ fd, size, xoff, local->xattr);
+ }
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
int
+dht_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t yoff, dict_t *xdata)
+{
+ int op = GF_FOP_READDIR;
+ dht_conf_t *conf = NULL;
+ int i = 0;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->subvolume_status[i]) {
+ op = GF_FOP_READDIRP;
+ break;
+ }
+ }
+
+ if (conf->use_readdirp)
+ op = GF_FOP_READDIRP;
+
+out:
+ dht_do_readdir (frame, this, fd, size, yoff, op, 0);
+ return 0;
+}
+
+int
+dht_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t yoff, dict_t *dict)
+{
+ dht_do_readdir (frame, this, fd, size, yoff, GF_FOP_READDIRP, dict);
+ return 0;
+}
+
+
+
+int
dht_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
+ int op_ret, int op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
- local = frame->local;
+ local = frame->local;
- LOCK (&frame->lock);
- {
- if (op_ret == -1)
- local->op_errno = op_errno;
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1)
+ local->op_errno = op_errno;
- if (op_ret == 0)
- local->op_ret = 0;
- }
- UNLOCK (&frame->lock);
+ if (op_ret == 0)
+ local->op_ret = 0;
+ }
+ UNLOCK (&frame->lock);
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno);
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt))
+ DHT_STACK_UNWIND (fsyncdir, frame, local->op_ret,
+ local->op_errno, xdata);
return 0;
}
int
-dht_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync)
+dht_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int datasync, dict_t *xdata)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
int op_errno = -1;
- int i = -1;
-
+ int i = -1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (this->private, err);
- conf = this->private;
+ conf = this->private;
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ local = dht_local_init (frame, NULL, NULL, GF_FOP_FSYNCDIR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- local->fd = fd_ref (fd);
- local->call_cnt = conf->subvolume_cnt;
+ local->fd = fd_ref (fd);
+ local->call_cnt = conf->subvolume_cnt;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (frame, dht_fsyncdir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->fsyncdir,
- fd, datasync);
- }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND (frame, dht_fsyncdir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->fsyncdir,
+ fd, datasync, xdata);
+ }
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fsyncdir, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
int
dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct stat *stbuf)
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
- int ret = -1;
+ xlator_t *prev = NULL;
+ int ret = -1;
+ dht_local_t *local = NULL;
- if (op_ret == -1)
- goto out;
+ if (op_ret == -1)
+ goto out;
- prev = cookie;
+ local = frame->local;
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
- dht_itransform (this, prev->this, stbuf->st_ino, &stbuf->st_ino);
- layout = dht_layout_for_subvol (this, prev->this);
+ prev = cookie;
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no pre-set layout for subvolume %s",
- prev->this->name);
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
+ if (local->loc.parent) {
- ret = inode_ctx_put (inode, this, (uint64_t)(long)layout);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not set inode context");
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ preparent, 0);
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ postparent, 1);
+ }
+ ret = dht_layout_preset (this, prev, inode);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "could not set pre-set layout for subvolume %s",
+ prev? prev->name: NULL);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+ if (local->linked == _gf_true)
+ dht_linkfile_attr_heal (frame, this);
out:
- DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf);
- return 0;
+ /*
+ * FIXME: ia_size and st_blocks of preparent and postparent do not have
+ * correct values. since, preparent and postparent buffers correspond
+ * to a directory these two members should have values equal to sum of
+ * corresponding values from each of the subvolume.
+ * See dht_iatt_merge for reference.
+ */
+ DHT_STRIP_PHASE1_FLAGS (stbuf);
+ DHT_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, stbuf,
+ preparent, postparent, xdata);
+ return 0;
}
int
dht_mknod_linkfile_create_cbk (call_frame_t *frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *stbuf)
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- xlator_t *cached_subvol = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *cached_subvol = NULL;
if (op_ret == -1)
goto err;
- local = frame->local;
- cached_subvol = local->cached_subvol;
+ local = frame->local;
+ if (!local || !local->cached_subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ cached_subvol = local->cached_subvol;
- STACK_WIND (frame, dht_newfile_cbk,
- cached_subvol, cached_subvol->fops->mknod,
- &local->loc, local->mode, local->rdev);
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)cached_subvol,
+ cached_subvol, cached_subvol->fops->mknod,
+ &local->loc, local->mode, local->rdev, local->umask,
+ local->params);
return 0;
- err:
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
- return 0;
+err:
+ DHT_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
}
int
dht_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t rdev)
+ loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *params)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- int ret = -1;
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
xlator_t *avail_subvol = NULL;
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
+ dht_local_t *local = NULL;
- conf = this->private;
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
dht_get_du_info (frame, this, loc);
- subvol = dht_subvol_get_hashed (this, loc);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = ENOENT;
- goto err;
- }
+ local = dht_local_init (frame, loc, NULL, GF_FOP_MKNOD);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = dht_subvol_get_hashed (this, loc);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
if (!dht_is_subvol_filled (this, subvol)) {
gf_log (this->name, GF_LOG_TRACE,
"creating %s on %s", loc->path, subvol->name);
-
- STACK_WIND (frame, dht_newfile_cbk,
- subvol, subvol->fops->mknod,
- loc, mode, rdev);
+
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)subvol,
+ subvol, subvol->fops->mknod, loc, mode,
+ rdev, umask, params);
} else {
- avail_subvol = dht_free_disk_available_subvol (this, subvol);
+
+ avail_subvol = dht_free_disk_available_subvol (this, subvol,
+ local);
if (avail_subvol != subvol) {
- /* Choose the minimum filled volume, and create the
+ /* Choose the minimum filled volume, and create the
files there */
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
- ret = loc_dup (loc, &local->loc);
- if (ret == -1) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ local->params = dict_ref (params);
local->cached_subvol = avail_subvol;
- local->mode = mode;
+ local->mode = mode;
local->rdev = rdev;
-
- dht_linkfile_create (frame,
+ local->umask = umask;
+ dht_linkfile_create (frame,
dht_mknod_linkfile_create_cbk,
- avail_subvol, subvol, loc);
+ this, avail_subvol, subvol, loc);
} else {
gf_log (this->name, GF_LOG_TRACE,
"creating %s on %s", loc->path, subvol->name);
-
- STACK_WIND (frame, dht_newfile_cbk,
- subvol, subvol->fops->mknod,
- loc, mode, rdev);
+
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk,
+ (void *)subvol, subvol,
+ subvol->fops->mknod, loc, mode,
+ rdev, umask, params);
}
}
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (mknod, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
int
dht_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkname, loc_t *loc)
+ const char *linkname, loc_t *loc, mode_t umask, dict_t *params)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
+ local = dht_local_init (frame, loc, NULL, GF_FOP_SYMLINK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- subvol = dht_subvol_get_hashed (this, loc);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = ENOENT;
- goto err;
- }
+ subvol = dht_subvol_get_hashed (this, loc);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
- gf_log (this->name, GF_LOG_TRACE,
- "creating %s on %s", loc->path, subvol->name);
+ gf_log (this->name, GF_LOG_TRACE,
+ "creating %s on %s", loc->path, subvol->name);
- STACK_WIND (frame, dht_newfile_cbk,
- subvol, subvol->fops->symlink,
- linkname, loc);
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)subvol, subvol,
+ subvol->fops->symlink, linkname, loc, umask,
+ params);
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (link, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
int
-dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
+dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
- xlator_t *cached_subvol = NULL;
- xlator_t *hashed_subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
-
- cached_subvol = dht_subvol_get_cached (this, loc->inode);
- if (!cached_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ xlator_t *cached_subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
- hashed_subvol = dht_subvol_get_hashed (this, loc);
- if (!hashed_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ if (dht_filter_loc_subvol_key (this, loc, &local->loc,
+ &cached_subvol)) {
+ gf_log (this->name, GF_LOG_INFO,
+ "unlinking %s on %s (given path %s)",
+ local->loc.path, cached_subvol->name, loc->path);
+ STACK_WIND (frame, dht_unlink_cbk,
+ cached_subvol, cached_subvol->fops->unlink,
+ &local->loc, xflag, xdata);
+ goto done;
+ }
- local->call_cnt = 1;
- if (hashed_subvol != cached_subvol)
- local->call_cnt++;
+ local = dht_local_init (frame, loc, NULL, GF_FOP_UNLINK);
+ if (!local) {
+ op_errno = ENOMEM;
- STACK_WIND (frame, dht_err_cbk,
- cached_subvol, cached_subvol->fops->unlink, loc);
+ goto err;
+ }
- if (hashed_subvol != cached_subvol)
- STACK_WIND (frame, dht_err_cbk,
- hashed_subvol, hashed_subvol->fops->unlink, loc);
+ hashed_subvol = dht_subvol_get_hashed (this, loc);
+ if (!hashed_subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
- return 0;
+ cached_subvol = local->cached_subvol;
+ if (!cached_subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+ local->flags = xflag;
+ if (hashed_subvol != cached_subvol) {
+ STACK_WIND (frame, dht_unlink_linkfile_cbk,
+ hashed_subvol, hashed_subvol->fops->unlink, loc,
+ xflag, xdata);
+ } else {
+ STACK_WIND (frame, dht_unlink_cbk,
+ cached_subvol, cached_subvol->fops->unlink, loc,
+ xflag, xdata);
+ }
+done:
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ return 0;
}
int
dht_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct stat *stbuf)
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
- dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
prev = cookie;
- local = frame->local;
+
+ local = frame->local;
if (op_ret == -1)
goto out;
- layout = dht_layout_for_subvol (this, prev->this);
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no pre-set layout for subvolume %s",
- prev->this->name);
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
-
- stbuf->st_ino = local->loc.inode->ino;
+ layout = dht_layout_for_subvol (this, prev->this);
+ if (!layout) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no pre-set layout for subvolume %s",
+ prev->this->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ preparent, 0);
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ postparent, 1);
+ }
+ if (local->linked == _gf_true) {
+ local->stbuf = *stbuf;
+ dht_linkfile_attr_heal (frame, this);
+ }
out:
- DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf);
+ DHT_STRIP_PHASE1_FLAGS (stbuf);
+ DHT_STACK_UNWIND (link, frame, op_ret, op_errno, inode, stbuf, preparent,
+ postparent, NULL);
- return 0;
+ return 0;
}
int
dht_link_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct stat *stbuf)
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- xlator_t *srcvol = NULL;
-
+ dht_local_t *local = NULL;
+ xlator_t *srcvol = NULL;
- if (op_ret == -1)
- goto err;
+ if (op_ret == -1)
+ goto err;
- local = frame->local;
- srcvol = local->linkfile.srcvol;
+ local = frame->local;
+ srcvol = local->linkfile.srcvol;
- STACK_WIND (frame, dht_link_cbk,
- srcvol, srcvol->fops->link,
- &local->loc, &local->loc2);
+ STACK_WIND (frame, dht_link_cbk, srcvol, srcvol->fops->link,
+ &local->loc, &local->loc2, xdata);
- return 0;
+ return 0;
err:
- DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf);
+ DHT_STRIP_PHASE1_FLAGS (stbuf);
+ DHT_STACK_UNWIND (link, frame, op_ret, op_errno, inode, stbuf, preparent,
+ postparent, NULL);
- return 0;
+ return 0;
}
int
dht_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
-{
- xlator_t *cached_subvol = NULL;
- xlator_t *hashed_subvol = NULL;
- int op_errno = -1;
- int ret = -1;
- dht_local_t *local = NULL;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (oldloc, err);
- VALIDATE_OR_GOTO (newloc, err);
-
- cached_subvol = dht_subvol_get_cached (this, oldloc->inode);
- if (!cached_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", oldloc->path);
- op_errno = EINVAL;
- goto err;
- }
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ xlator_t *cached_subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
+ int op_errno = -1;
+ int ret = -1;
+ dht_local_t *local = NULL;
- hashed_subvol = dht_subvol_get_hashed (this, newloc);
- if (!hashed_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- newloc->path);
- op_errno = EINVAL;
- goto err;
- }
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (oldloc, err);
+ VALIDATE_OR_GOTO (newloc, err);
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ local = dht_local_init (frame, oldloc, NULL, GF_FOP_LINK);
+ if (!local) {
+ op_errno = ENOMEM;
- ret = loc_copy (&local->loc, oldloc);
- if (ret == -1) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ goto err;
+ }
- ret = loc_copy (&local->loc2, newloc);
- if (ret == -1) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ cached_subvol = local->cached_subvol;
+ if (!cached_subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for path=%s", oldloc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
- if (hashed_subvol != cached_subvol) {
- dht_linkfile_create (frame, dht_link_linkfile_cbk,
- cached_subvol, hashed_subvol, newloc);
- } else {
- STACK_WIND (frame, dht_link_cbk,
- cached_subvol, cached_subvol->fops->link,
- oldloc, newloc);
- }
+ hashed_subvol = dht_subvol_get_hashed (this, newloc);
+ if (!hashed_subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no subvolume in layout for path=%s",
+ newloc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
- return 0;
+ ret = loc_copy (&local->loc2, newloc);
+ if (ret == -1) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ if (hashed_subvol != cached_subvol) {
+ uuid_copy (local->gfid, oldloc->inode->gfid);
+ dht_linkfile_create (frame, dht_link_linkfile_cbk, this,
+ cached_subvol, hashed_subvol, newloc);
+ } else {
+ STACK_WIND (frame, dht_link_cbk,
+ cached_subvol, cached_subvol->fops->link,
+ oldloc, newloc, xdata);
+ }
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
int
dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- fd_t *fd, inode_t *inode, struct stat *stbuf)
+ int op_ret, int op_errno,
+ fd_t *fd, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
- int ret = -1;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+ dht_local_t *local = NULL;
- if (op_ret == -1)
- goto out;
+ if (op_ret == -1)
+ goto out;
- prev = cookie;
+ local = frame->local;
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
- dht_itransform (this, prev->this, stbuf->st_ino, &stbuf->st_ino);
- layout = dht_layout_for_subvol (this, prev->this);
+ prev = cookie;
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no pre-set layout for subvolume %s",
- prev->this->name);
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ preparent, 0);
- ret = inode_ctx_put (inode, this, (uint64_t)(long)layout);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not set inode context");
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ postparent, 1);
+ }
+ ret = dht_layout_preset (this, prev->this, inode);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "could not set preset layout for subvol %s",
+ prev->this->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+ if (local->linked == _gf_true) {
+ local->stbuf = *stbuf;
+ dht_linkfile_attr_heal (frame, this);
+ }
out:
- DHT_STACK_UNWIND (frame, op_ret, op_errno, fd, inode, stbuf);
- return 0;
+ DHT_STRIP_PHASE1_FLAGS (stbuf);
+ DHT_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, stbuf, preparent,
+ postparent, NULL);
+ return 0;
}
int
dht_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *stbuf)
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- xlator_t *cached_subvol = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *cached_subvol = NULL;
if (op_ret == -1)
goto err;
- local = frame->local;
- cached_subvol = local->cached_subvol;
+ local = frame->local;
+ cached_subvol = local->cached_subvol;
STACK_WIND (frame, dht_create_cbk,
cached_subvol, cached_subvol->fops->create,
- &local->loc, local->flags, local->mode, local->fd);
+ &local->loc, local->flags, local->mode,
+ local->umask, local->fd, local->params);
return 0;
- err:
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+err:
+ DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
}
int
dht_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode, fd_t *fd)
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *params)
{
- int op_errno = -1;
- int ret = -1;
- xlator_t *subvol = NULL;
- dht_conf_t *conf = NULL;
+ int op_errno = -1;
+ xlator_t *subvol = NULL;
dht_local_t *local = NULL;
xlator_t *avail_subvol = NULL;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
-
- conf = this->private;
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
dht_get_du_info (frame, this, loc);
- local = dht_local_init (frame);
- if (!local) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
+ local = dht_local_init (frame, loc, fd, GF_FOP_CREATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- subvol = dht_subvol_get_hashed (this, loc);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = ENOENT;
- goto err;
- }
+ if (dht_filter_loc_subvol_key (this, loc, &local->loc,
+ &subvol)) {
+ gf_log (this->name, GF_LOG_INFO,
+ "creating %s on %s (got create on %s)",
+ local->loc.path, subvol->name, loc->path);
+ STACK_WIND (frame, dht_create_cbk,
+ subvol, subvol->fops->create,
+ &local->loc, flags, mode, umask, fd, params);
+ goto done;
+ }
+
+ subvol = dht_subvol_get_hashed (this, loc);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
if (!dht_is_subvol_filled (this, subvol)) {
gf_log (this->name, GF_LOG_TRACE,
"creating %s on %s", loc->path, subvol->name);
STACK_WIND (frame, dht_create_cbk,
subvol, subvol->fops->create,
- loc, flags, mode, fd);
- } else {
- /* Choose the minimum filled volume, and create the
- files there */
- /* TODO */
- avail_subvol = dht_free_disk_available_subvol (this, subvol);
- if (avail_subvol != subvol) {
- ret = loc_dup (loc, &local->loc);
- if (ret == -1) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- local->fd = fd_ref (fd);
- local->flags = flags;
- local->mode = mode;
-
- local->cached_subvol = avail_subvol;
- local->hashed_subvol = subvol;
- gf_log (this->name, GF_LOG_TRACE,
- "creating %s on %s (link at %s)", loc->path,
- avail_subvol->name, subvol->name);
- dht_linkfile_create (frame,
- dht_create_linkfile_create_cbk,
- avail_subvol, subvol, loc);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "creating %s on %s", loc->path, subvol->name);
- STACK_WIND (frame, dht_create_cbk,
- subvol, subvol->fops->create,
- loc, flags, mode, fd);
-
- }
+ loc, flags, mode, umask, fd, params);
+ goto done;
}
-
- return 0;
+ /* Choose the minimum filled volume, and create the
+ files there */
+ avail_subvol = dht_free_disk_available_subvol (this, subvol, local);
+ if (avail_subvol != subvol) {
+ local->params = dict_ref (params);
+ local->flags = flags;
+ local->mode = mode;
+ local->umask = umask;
+ local->cached_subvol = avail_subvol;
+ local->hashed_subvol = subvol;
+ gf_log (this->name, GF_LOG_TRACE,
+ "creating %s on %s (link at %s)", loc->path,
+ avail_subvol->name, subvol->name);
+ dht_linkfile_create (frame, dht_create_linkfile_create_cbk,
+ this, avail_subvol, subvol, loc);
+ goto done;
+ }
+ gf_log (this->name, GF_LOG_TRACE,
+ "creating %s on %s", loc->path, subvol->name);
+ STACK_WIND (frame, dht_create_cbk,
+ subvol, subvol->fops->create,
+ loc, flags, mode, umask, fd, params);
+done:
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL, NULL);
- return 0;
+ return 0;
}
int
dht_mkdir_selfheal_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ local = frame->local;
+ layout = local->selfheal.layout;
- local = frame->local;
- layout = local->selfheal.layout;
+ if (op_ret == 0) {
+ dht_layout_set (this, local->inode, layout);
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->preparent, 0);
- if (op_ret == 0) {
- inode_ctx_put (local->inode, this, (uint64_t)(long)layout);
- local->selfheal.layout = NULL;
- local->stbuf.st_ino = local->st_ino;
- }
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
+ }
- DHT_STACK_UNWIND (frame, op_ret, op_errno,
- local->inode, &local->stbuf);
+ DHT_STACK_UNWIND (mkdir, frame, op_ret, op_errno,
+ local->inode, &local->stbuf, &local->preparent,
+ &local->postparent, NULL);
- return 0;
+ return 0;
}
int
dht_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode, struct stat *stbuf)
+ int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- int ret = -1;
- int subvol_filled = 0;
- call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
- dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ int ret = -1;
+ gf_boolean_t subvol_filled = _gf_false;
+ call_frame_t *prev = NULL;
+ dht_layout_t *layout = NULL;
- conf = this->private;
- local = frame->local;
- prev = cookie;
- layout = local->layout;
+ local = frame->local;
+ prev = cookie;
+ layout = local->layout;
subvol_filled = dht_is_subvol_filled (this, prev->this);
- LOCK (&frame->lock);
- {
+ LOCK (&frame->lock);
+ {
if (subvol_filled && (op_ret != -1)) {
ret = dht_layout_merge (this, layout, prev->this,
-1, ENOSPC, NULL);
} else {
+ if (op_ret == -1 && op_errno == EEXIST)
+ /* Very likely just a race between mkdir and
+ self-heal (from lookup of a concurrent mkdir
+ attempt).
+ Ignore error for now. layout setting will
+ anyways fail if this was a different (old)
+ pre-existing different directory.
+ */
+ op_ret = 0;
ret = dht_layout_merge (this, layout, prev->this,
op_ret, op_errno, NULL);
}
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to merge layouts", local->loc.path);
- if (op_ret == -1) {
- local->op_errno = op_errno;
- goto unlock;
- }
- dht_stat_merge (this, &local->stbuf, stbuf, prev->this);
- }
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ goto unlock;
+ }
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+ dht_iatt_merge (this, &local->preparent, preparent, prev->this);
+ dht_iatt_merge (this, &local->postparent, postparent,
+ prev->this);
+ }
unlock:
- UNLOCK (&frame->lock);
+ UNLOCK (&frame->lock);
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- local->layout = NULL;
- dht_selfheal_new_directory (frame, dht_mkdir_selfheal_cbk,
- layout);
- }
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ dht_selfheal_new_directory (frame, dht_mkdir_selfheal_cbk,
+ layout);
+ }
return 0;
}
int
-dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- inode_t *inode, struct stat *stbuf)
+dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- int ret = -1;
- call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
- dht_conf_t *conf = NULL;
- int i = 0;
- xlator_t *hashed_subvol = NULL;
+ dht_local_t *local = NULL;
+ int ret = -1;
+ call_frame_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ xlator_t *hashed_subvol = NULL;
- local = frame->local;
- prev = cookie;
- layout = local->layout;
- conf = this->private;
- hashed_subvol = local->hashed_subvol;
+ VALIDATE_OR_GOTO (this->private, err);
+
+ local = frame->local;
+ prev = cookie;
+ layout = local->layout;
+ conf = this->private;
+ hashed_subvol = local->hashed_subvol;
+
+ if (uuid_is_null (local->loc.gfid) && !op_ret)
+ uuid_copy (local->loc.gfid, stbuf->ia_gfid);
if (dht_is_subvol_filled (this, hashed_subvol))
ret = dht_layout_merge (this, layout, prev->this,
@@ -2934,47 +4144,55 @@ dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie,
else
ret = dht_layout_merge (this, layout, prev->this,
op_ret, op_errno, NULL);
-
- if (op_ret == -1) {
- local->op_errno = op_errno;
- goto err;
- }
- local->op_ret = 0;
- dht_stat_merge (this, &local->stbuf, stbuf, prev->this);
+ /* TODO: we may have to return from the function
+ if layout merge fails. For now, lets just log an error */
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to merge layouts", local->loc.path);
- local->st_ino = local->stbuf.st_ino;
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ goto err;
+ }
+ local->op_ret = 0;
- local->call_cnt = conf->subvolume_cnt - 1;
-
- if (local->call_cnt == 0) {
- local->layout = NULL;
- dht_selfheal_directory (frame, dht_mkdir_selfheal_cbk,
- &local->loc, layout);
- }
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->subvolumes[i] == hashed_subvol)
- continue;
- STACK_WIND (frame, dht_mkdir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->mkdir,
- &local->loc, local->mode);
- }
- return 0;
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+ dht_iatt_merge (this, &local->preparent, preparent, prev->this);
+ dht_iatt_merge (this, &local->postparent, postparent, prev->this);
+
+ local->call_cnt = conf->subvolume_cnt - 1;
+
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, stbuf->ia_gfid);
+ if (local->call_cnt == 0) {
+ dht_selfheal_directory (frame, dht_mkdir_selfheal_cbk,
+ &local->loc, layout);
+ }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == hashed_subvol)
+ continue;
+ STACK_WIND (frame, dht_mkdir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->mkdir, &local->loc,
+ local->mode, local->umask, local->params);
+ }
+ return 0;
err:
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+ DHT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL);
return 0;
}
-int
+
+ int
dht_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode)
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *params)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
int op_errno = -1;
- int ret = -1;
- xlator_t *hashed_subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
VALIDATE_OR_GOTO (frame, err);
@@ -2982,722 +4200,1013 @@ dht_mkdir (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (loc, err);
VALIDATE_OR_GOTO (loc->inode, err);
VALIDATE_OR_GOTO (loc->path, err);
+ VALIDATE_OR_GOTO (this->private, err);
- conf = this->private;
+ conf = this->private;
dht_get_du_info (frame, this, loc);
- local = dht_local_init (frame);
- if (!local) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- hashed_subvol = dht_subvol_get_hashed (this, loc);
+ local = dht_local_init (frame, loc, NULL, GF_FOP_MKDIR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- if (hashed_subvol == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "hashed subvol not found for %s",
+ hashed_subvol = dht_subvol_get_hashed (this, loc);
+ if (hashed_subvol == NULL) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "hashed subvol not found for %s",
loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ op_errno = EINVAL;
+ goto err;
+ }
- local->hashed_subvol = hashed_subvol;
- local->inode = inode_ref (loc->inode);
- ret = loc_copy (&local->loc, loc);
- local->mode = mode;
+ local->hashed_subvol = hashed_subvol;
+ local->mode = mode;
+ local->umask = umask;
+ local->params = dict_ref (params);
+ local->inode = inode_ref (loc->inode);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- local->layout = dht_layout_new (this, conf->subvolume_cnt);
- if (!local->layout) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
+ local->layout = dht_layout_new (this, conf->subvolume_cnt);
+ if (!local->layout) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- STACK_WIND (frame, dht_mkdir_hashed_cbk,
- hashed_subvol,
- hashed_subvol->fops->mkdir,
- loc, mode);
+ STACK_WIND (frame, dht_mkdir_hashed_cbk,
+ hashed_subvol,
+ hashed_subvol->fops->mkdir,
+ loc, mode, umask, params);
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL);
- return 0;
+ return 0;
}
int
dht_rmdir_selfheal_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
+ int op_ret, int op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
- local = frame->local;
- local->layout = NULL;
+ local = frame->local;
- DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno);
+ DHT_STACK_UNWIND (rmdir, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, NULL);
- return 0;
+ return 0;
}
int
-dht_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
+dht_rmdir_hashed_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- uint64_t tmp_layout = 0;
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- local->op_ret = -1;
-
- if (op_errno != ENOENT)
- local->need_selfheal = 1;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "rmdir on %s for %s failed (%s)",
- prev->this->name, local->loc.path,
- strerror (op_errno));
- goto unlock;
- }
- }
+ local = frame->local;
+ prev = cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ if (op_errno != ENOENT && op_errno != EACCES) {
+ local->need_selfheal = 1;
+ }
+
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "rmdir on %s for %s failed (%s)",
+ prev->this->name, local->loc.path,
+ strerror (op_errno));
+ goto unlock;
+ }
+
+ dht_iatt_merge (this, &local->preparent, preparent, prev->this);
+ dht_iatt_merge (this, &local->postparent, postparent,
+ prev->this);
+
+ }
unlock:
- UNLOCK (&frame->lock);
+ UNLOCK (&frame->lock);
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ if (local->need_selfheal) {
+ local->layout =
+ dht_layout_get (this, local->loc.inode);
+
+ /* TODO: neater interface needed below */
+ local->stbuf.ia_type = local->loc.inode->ia_type;
+
+ uuid_copy (local->gfid, local->loc.inode->gfid);
+ dht_selfheal_restore (frame, dht_rmdir_selfheal_cbk,
+ &local->loc, local->layout);
+ } else {
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent,
+ this,
+ &local->preparent,
+ 0);
+
+ dht_inode_ctx_time_update (local->loc.parent,
+ this,
+ &local->postparent,
+ 1);
+ }
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- if (local->need_selfheal) {
- inode_ctx_get (local->loc.inode, this,
- &tmp_layout);
- layout = (dht_layout_t *)(long)tmp_layout;
-
- /* TODO: neater interface needed below */
- local->stbuf.st_mode = local->loc.inode->st_mode;
-
- dht_selfheal_restore (frame, dht_rmdir_selfheal_cbk,
- &local->loc, layout);
- } else {
- DHT_STACK_UNWIND (frame, local->op_ret,
- local->op_errno);
- }
- }
+ DHT_STACK_UNWIND (rmdir, frame, local->op_ret,
+ local->op_errno, &local->preparent,
+ &local->postparent, NULL);
+ }
+ }
return 0;
}
int
-dht_rmdir_do (call_frame_t *frame, xlator_t *this)
+dht_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int i = 0;
-
- conf = this->private;
- local = frame->local;
-
- if (local->op_ret == -1)
- goto err;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+ int done = 0;
- local->call_cnt = conf->subvolume_cnt;
+ local = frame->local;
+ prev = cookie;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (frame, dht_rmdir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->rmdir,
- &local->loc);
- }
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
- return 0;
+ if (op_errno != ENOENT && op_errno != EACCES) {
+ local->need_selfheal = 1;
+ }
-err:
- DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno);
- return 0;
-}
+ gf_log (this->name, GF_LOG_DEBUG,
+ "rmdir on %s for %s failed (%s)",
+ prev->this->name, local->loc.path,
+ strerror (op_errno));
+ goto unlock;
+ }
+ /* Track if rmdir succeeded on atleast one subvol*/
+ local->fop_succeeded = 1;
+ dht_iatt_merge (this, &local->preparent, preparent, prev->this);
+ dht_iatt_merge (this, &local->postparent, postparent,
+ prev->this);
+ }
+unlock:
+ UNLOCK (&frame->lock);
-int
-dht_rmdir_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *entries)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = -1;
- call_frame_t *prev = NULL;
- local = frame->local;
- prev = cookie;
-
- if (op_ret > 2) {
- gf_log (this->name, GF_LOG_TRACE,
- "readdir on %s for %s returned %d entries",
- prev->this->name, local->loc.path, op_ret);
- local->op_ret = -1;
- local->op_errno = ENOTEMPTY;
- }
+ this_call_cnt = dht_frame_return (frame);
- this_call_cnt = dht_frame_return (frame);
+ /* if local->hashed_subvol, we are yet to wind to hashed_subvol. */
+ if (local->hashed_subvol && (this_call_cnt == 1)) {
+ done = 1;
+ } else if (!local->hashed_subvol && !this_call_cnt) {
+ done = 1;
+ }
- if (is_last_call (this_call_cnt)) {
- dht_rmdir_do (frame, this);
- }
- return 0;
-}
+ if (done) {
+ if (local->need_selfheal && local->fop_succeeded) {
+ local->layout =
+ dht_layout_get (this, local->loc.inode);
+ /* TODO: neater interface needed below */
+ local->stbuf.ia_type = local->loc.inode->ia_type;
-int
-dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, fd_t *fd)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = -1;
- call_frame_t *prev = NULL;
+ uuid_copy (local->gfid, local->loc.inode->gfid);
+ dht_selfheal_restore (frame, dht_rmdir_selfheal_cbk,
+ &local->loc, local->layout);
+ } else if (this_call_cnt) {
+ /* If non-hashed subvol's have responded, proceed */
+ local->need_selfheal = 0;
+ STACK_WIND (frame, dht_rmdir_hashed_subvol_cbk,
+ local->hashed_subvol,
+ local->hashed_subvol->fops->rmdir,
+ &local->loc, local->flags, NULL);
+ } else if (!this_call_cnt) {
+ /* All subvol's have responded, proceed */
- local = frame->local;
- prev = cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "opendir on %s for %s failed (%s)",
- prev->this->name, local->loc.path,
- strerror (op_errno));
- goto err;
- }
+ if (local->loc.parent) {
- STACK_WIND (frame, dht_rmdir_readdir_cbk,
- prev->this, prev->this->fops->readdir,
- local->fd, 4096, 0);
+ dht_inode_ctx_time_update (local->loc.parent,
+ this,
+ &local->preparent,
+ 0);
- return 0;
+ dht_inode_ctx_time_update (local->loc.parent,
+ this,
+ &local->postparent,
+ 1);
-err:
- this_call_cnt = dht_frame_return (frame);
+ }
- if (is_last_call (this_call_cnt)) {
- dht_rmdir_do (frame, this);
- }
+ DHT_STACK_UNWIND (rmdir, frame, local->op_ret,
+ local->op_errno, &local->preparent,
+ &local->postparent, NULL);
+ }
+ }
- return 0;
+ return 0;
}
int
-dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc)
+dht_rmdir_do (call_frame_t *frame, xlator_t *this)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int op_errno = -1;
- int i = -1;
- int ret = -1;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ xlator_t *hashed_subvol = NULL;
+ VALIDATE_OR_GOTO (this->private, err);
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ conf = this->private;
+ local = frame->local;
- conf = this->private;
+ if (local->op_ret == -1)
+ goto err;
- local = dht_local_init (frame);
- if (!local) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
+ local->call_cnt = conf->subvolume_cnt;
- local->call_cnt = conf->subvolume_cnt;
- local->op_ret = 0;
+ /* first remove from non-hashed_subvol */
+ hashed_subvol = dht_subvol_get_hashed (this, &local->loc);
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
+ if (!hashed_subvol) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to get hashed "
+ "subvol for %s",local->loc.path);
+ } else {
+ local->hashed_subvol = hashed_subvol;
+ }
- local->fd = fd_create (local->loc.inode, frame->root->pid);
- if (!local->fd) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
+ /* When DHT has only 1 child */
+ if (conf->subvolume_cnt == 1) {
+ STACK_WIND (frame, dht_rmdir_hashed_subvol_cbk,
+ conf->subvolumes[0],
+ conf->subvolumes[0]->fops->rmdir,
+ &local->loc, local->flags, NULL);
+ return 0;
+ }
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (frame, dht_rmdir_opendir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->opendir,
- loc, local->fd);
- }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (hashed_subvol &&
+ (hashed_subvol == conf->subvolumes[i]))
+ continue;
- return 0;
+ STACK_WIND (frame, dht_rmdir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->rmdir,
+ &local->loc, local->flags, NULL);
+ }
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno);
+ return 0;
- return 0;
+err:
+ DHT_STACK_UNWIND (rmdir, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, NULL);
+ return 0;
}
int
-dht_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+dht_rmdir_linkfile_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- DHT_STACK_UNWIND (frame, op_ret, op_errno, dict);
- return 0;
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *src = NULL;
+ call_frame_t *main_frame = NULL;
+ dht_local_t *main_local = NULL;
+ int this_call_cnt = 0;
+
+ local = frame->local;
+ prev = cookie;
+ src = prev->this;
+
+ main_frame = local->main_frame;
+ main_local = main_frame->local;
+
+ if (op_ret == 0) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "unlinked linkfile %s on %s",
+ local->loc.path, src->name);
+ } else {
+ main_local->op_ret = -1;
+ main_local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "unlink of %s on %s failed (%s)",
+ local->loc.path, src->name, strerror (op_errno));
+ }
+
+ this_call_cnt = dht_frame_return (main_frame);
+ if (is_last_call (this_call_cnt))
+ dht_rmdir_do (main_frame, this);
+
+ DHT_STACK_DESTROY (frame);
+ return 0;
}
int
-dht_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict)
+dht_rmdir_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr, struct iatt *parent)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *src = NULL;
+ call_frame_t *main_frame = NULL;
+ dht_local_t *main_local = NULL;
+ int this_call_cnt = 0;
+ dht_conf_t *conf = this->private;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ local = frame->local;
+ prev = cookie;
+ src = prev->this;
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ main_frame = local->main_frame;
+ main_local = main_frame->local;
- local->inode = inode_ref (loc->inode);
- local->call_cnt = 1;
+ if (op_ret != 0)
+ goto err;
- STACK_WIND (frame,
- dht_xattrop_cbk,
- subvol, subvol->fops->xattrop,
- loc, flags, dict);
+ if (!check_is_linkfile (inode, stbuf, xattr, conf->link_xattr_name)) {
+ main_local->op_ret = -1;
+ main_local->op_errno = ENOTEMPTY;
- return 0;
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s on %s found to be not a linkfile (type=0%o)",
+ local->loc.path, src->name, stbuf->ia_type);
+ goto err;
+ }
+ STACK_WIND (frame, dht_rmdir_linkfile_unlink_cbk,
+ src, src->fops->unlink, &local->loc, 0, NULL);
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
+ this_call_cnt = dht_frame_return (main_frame);
+ if (is_last_call (this_call_cnt))
+ dht_rmdir_do (main_frame, this);
+
+ DHT_STACK_DESTROY (frame);
+ return 0;
}
int
-dht_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this,
+ gf_dirent_t *entries, xlator_t *src)
{
- DHT_STACK_UNWIND (frame, op_ret, op_errno, dict);
- return 0;
-}
+ int ret = 0;
+ int build_ret = 0;
+ gf_dirent_t *trav = NULL;
+ call_frame_t *lookup_frame = NULL;
+ dht_local_t *lookup_local = NULL;
+ dht_local_t *local = NULL;
+ dict_t *xattrs = NULL;
+ dht_conf_t *conf = this->private;
+ local = frame->local;
-int
-dht_fxattrop (call_frame_t *frame, xlator_t *this,
- fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
+ list_for_each_entry (trav, &entries->list, list) {
+ if (strcmp (trav->d_name, ".") == 0)
+ continue;
+ if (strcmp (trav->d_name, "..") == 0)
+ continue;
+ if (check_is_linkfile (NULL, (&trav->d_stat), trav->dict,
+ conf->link_xattr_name)) {
+ ret++;
+ continue;
+ }
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+ /* this entry is either a directory which is neither "." nor "..",
+ or a non directory which is not a linkfile. the directory is to
+ be treated as non-empty
+ */
+ return 0;
+ }
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ xattrs = dict_new ();
+ if (!xattrs) {
+ gf_log (this->name, GF_LOG_ERROR, "dict_new failed");
+ return -1;
+ }
+
+ ret = dict_set_uint32 (xattrs, conf->link_xattr_name, 256);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to set linkto key"
+ " in dict");
+ if (xattrs)
+ dict_unref (xattrs);
+ return -1;
+ }
- STACK_WIND (frame,
- dht_fxattrop_cbk,
- subvol, subvol->fops->fxattrop,
- fd, flags, dict);
+ list_for_each_entry (trav, &entries->list, list) {
+ if (strcmp (trav->d_name, ".") == 0)
+ continue;
+ if (strcmp (trav->d_name, "..") == 0)
+ continue;
- return 0;
+ lookup_frame = NULL;
+ lookup_local = NULL;
+ lookup_frame = copy_frame (frame);
+ if (!lookup_frame) {
+ /* out of memory, let the rmdir fail
+ (as non-empty, unfortunately) */
+ goto err;
+ }
+
+ lookup_local = mem_get0 (this->local_pool);
+ if (!lookup_local) {
+ goto err;
+ }
+
+ lookup_frame->local = lookup_local;
+ lookup_local->main_frame = frame;
+
+ build_ret = dht_build_child_loc (this, &lookup_local->loc,
+ &local->loc, trav->d_name);
+ if (build_ret != 0)
+ goto err;
+
+ uuid_copy (lookup_local->loc.gfid, trav->d_stat.ia_gfid);
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "looking up %s on %s",
+ lookup_local->loc.path, src->name);
+
+ LOCK (&frame->lock);
+ {
+ local->call_cnt++;
+ }
+ UNLOCK (&frame->lock);
+
+ STACK_WIND (lookup_frame, dht_rmdir_lookup_cbk,
+ src, src->fops->lookup,
+ &lookup_local->loc, xattrs);
+ ret++;
+ }
+
+ if (xattrs)
+ dict_unref (xattrs);
+
+ return ret;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+ if (xattrs)
+ dict_unref (xattrs);
- return 0;
+ DHT_STACK_DESTROY (lookup_frame);
+ return 0;
}
int
-dht_inodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
+dht_rmdir_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
- DHT_STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
+ dht_local_t *local = NULL;
+ int this_call_cnt = -1;
+ call_frame_t *prev = NULL;
+ xlator_t *src = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ prev = cookie;
+ src = prev->this;
+
+ if (op_ret > 2) {
+ ret = dht_rmdir_is_subvol_empty (frame, this, entries, src);
+
+ switch (ret) {
+ case 0: /* non linkfiles exist */
+ gf_log (this->name, GF_LOG_TRACE,
+ "readdir on %s for %s returned %d entries",
+ prev->this->name, local->loc.path, op_ret);
+ local->op_ret = -1;
+ local->op_errno = ENOTEMPTY;
+ break;
+ default:
+ /* @ret number of linkfiles are getting unlinked */
+ gf_log (this->name, GF_LOG_TRACE,
+ "readdir on %s for %s found %d linkfiles",
+ prev->this->name, local->loc.path, ret);
+ break;
+ }
+ }
+
+ this_call_cnt = dht_frame_return (frame);
+
+ if (is_last_call (this_call_cnt)) {
+ dht_rmdir_do (frame, this);
+ }
+
+ return 0;
}
-int32_t
-dht_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct flock *lock)
+int
+dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = -1;
+ call_frame_t *prev = NULL;
+ dict_t *dict = NULL;
+ int ret = 0;
+ dht_conf_t *conf = this->private;
+ local = frame->local;
+ prev = cookie;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "opendir on %s for %s failed (%s)",
+ prev->this->name, local->loc.path,
+ strerror (op_errno));
+ if (op_errno != ENOENT) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ }
+ goto err;
+ }
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ dict = dict_new ();
+ if (!dict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ }
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ ret = dict_set_uint32 (dict, conf->link_xattr_name, 256);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set '%s' key",
+ local->loc.path, conf->link_xattr_name);
- local->inode = inode_ref (loc->inode);
- local->call_cnt = 1;
+ STACK_WIND (frame, dht_rmdir_readdirp_cbk,
+ prev->this, prev->this->fops->readdirp,
+ local->fd, 4096, 0, dict);
- STACK_WIND (frame,
- dht_inodelk_cbk,
- subvol, subvol->fops->inodelk,
- volume, loc, cmd, lock);
+ if (dict)
+ dict_unref (dict);
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno);
-
- return 0;
-}
-
+ this_call_cnt = dht_frame_return (frame);
-int
-dht_finodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+ if (is_last_call (this_call_cnt)) {
+ dht_rmdir_do (frame, this);
+ }
-{
- DHT_STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
+ return 0;
}
int
-dht_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct flock *lock)
+dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
- xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
int op_errno = -1;
+ int i = -1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+ VALIDATE_OR_GOTO (this->private, err);
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ conf = this->private;
+ local = dht_local_init (frame, loc, NULL, GF_FOP_RMDIR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- STACK_WIND (frame,
- dht_finodelk_cbk,
- subvol, subvol->fops->finodelk,
- volume, fd, cmd, lock);
+ local->call_cnt = conf->subvolume_cnt;
+ local->op_ret = 0;
+ local->fop_succeeded = 0;
- return 0;
+ local->flags = flags;
+
+ local->fd = fd_create (local->loc.inode, frame->root->pid);
+ if (!local->fd) {
+
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND (frame, dht_rmdir_opendir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->opendir,
+ loc, local->fd, NULL);
+ }
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (rmdir, frame, -1, op_errno,
+ NULL, NULL, NULL);
- return 0;
+ return 0;
}
-
int
dht_entrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- DHT_STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
+ DHT_STACK_UNWIND (entrylk, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
dht_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ const char *volume, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
- xlator_t *subvol = NULL;
+ xlator_t *subvol = NULL;
int op_errno = -1;
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ local = dht_local_init (frame, loc, NULL, GF_FOP_ENTRYLK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
- local->inode = inode_ref (loc->inode);
- local->call_cnt = 1;
+ local->call_cnt = 1;
- STACK_WIND (frame, dht_entrylk_cbk,
- subvol, subvol->fops->entrylk,
- volume, loc, basename, cmd, type);
+ STACK_WIND (frame, dht_entrylk_cbk,
+ subvol, subvol->fops->entrylk,
+ volume, loc, basename, cmd, type, xdata);
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (entrylk, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
int
dht_fentrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- DHT_STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
+ DHT_STACK_UNWIND (fentrylk, frame, op_ret, op_errno, NULL);
+ return 0;
}
int
dht_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ const char *volume, fd_t *fd, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
- xlator_t *subvol = NULL;
+ xlator_t *subvol = NULL;
int op_errno = -1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ subvol = dht_subvol_get_cached (this, fd->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
- STACK_WIND (frame, dht_fentrylk_cbk,
- subvol, subvol->fops->fentrylk,
- volume, fd, basename, cmd, type);
+ STACK_WIND (frame, dht_fentrylk_cbk,
+ subvol, subvol->fops->fentrylk,
+ volume, fd, basename, cmd, type, xdata);
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fentrylk, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
int
dht_forget (xlator_t *this, inode_t *inode)
{
- uint64_t tmp_layout = 0;
- dht_layout_t *layout = NULL;
+ uint64_t ctx_int = 0;
+ dht_inode_ctx_t *ctx = NULL;
+ dht_layout_t *layout = NULL;
- inode_ctx_get (inode, this, &tmp_layout);
+ inode_ctx_del (inode, this, &ctx_int);
- if (!tmp_layout)
- return 0;
+ if (!ctx_int)
+ return 0;
- layout = (dht_layout_t *)(long)tmp_layout;
- if (!layout->preset)
- FREE (layout);
+ ctx = (dht_inode_ctx_t *) (long) ctx_int;
- return 0;
-}
+ layout = ctx->layout;
+ ctx->layout = NULL;
+ dht_layout_unref (this, layout);
+ GF_FREE (ctx);
+ return 0;
+}
int
-dht_init_subvolumes (xlator_t *this, dht_conf_t *conf)
+dht_notify (xlator_t *this, int event, void *data, ...)
{
- xlator_list_t *subvols = NULL;
- int cnt = 0;
+ xlator_t *subvol = NULL;
+ int cnt = -1;
+ int i = -1;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int propagate = 0;
+
+ int had_heard_from_all = 0;
+ int have_heard_from_all = 0;
+ struct timeval time = {0,};
+ gf_defrag_info_t *defrag = NULL;
+ dict_t *dict = NULL;
+ gf_defrag_type cmd = 0;
+ dict_t *output = NULL;
+ va_list ap;
- for (subvols = this->children; subvols; subvols = subvols->next)
- cnt++;
-
- conf->subvolumes = CALLOC (cnt, sizeof (xlator_t *));
- if (!conf->subvolumes) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- return -1;
+ conf = this->private;
+ if (!conf)
+ return ret;
+
+ /* had all subvolumes reported status once till now? */
+ had_heard_from_all = 1;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->last_event[i]) {
+ had_heard_from_all = 0;
+ }
}
- conf->subvolume_cnt = cnt;
- cnt = 0;
- for (subvols = this->children; subvols; subvols = subvols->next)
- conf->subvolumes[cnt++] = subvols->xlator;
+ switch (event) {
+ case GF_EVENT_CHILD_UP:
+ subvol = data;
- conf->subvolume_status = CALLOC (cnt, sizeof (char));
- if (!conf->subvolume_status) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- return -1;
- }
+ conf->gen++;
- return 0;
-}
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ cnt = i;
+ break;
+ }
+ }
+ if (cnt == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "got GF_EVENT_CHILD_UP bad subvolume %s",
+ subvol->name);
+ break;
+ }
-int
-dht_notify (xlator_t *this, int event, void *data, ...)
-{
- xlator_t *subvol = NULL;
- int cnt = -1;
- int i = -1;
- dht_conf_t *conf = NULL;
- int ret = -1;
+ gettimeofday (&time, NULL);
+ LOCK (&conf->subvolume_lock);
+ {
+ conf->subvolume_status[cnt] = 1;
+ conf->last_event[cnt] = event;
+ conf->subvol_up_time[cnt] = time.tv_sec;
+ }
+ UNLOCK (&conf->subvolume_lock);
+ /* one of the node came back up, do a stat update */
+ dht_get_du_info_for_subvol (this, cnt);
- conf = this->private;
+ break;
- switch (event) {
- case GF_EVENT_CHILD_UP:
- subvol = data;
+ case GF_EVENT_CHILD_MODIFIED:
+ subvol = data;
- conf->gen++;
+ conf->gen++;
+ propagate = 1;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (subvol == conf->subvolumes[i]) {
- cnt = i;
- break;
- }
- }
+ break;
- if (cnt == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "got GF_EVENT_CHILD_UP bad subvolume %s",
- subvol->name);
- break;
- }
+ case GF_EVENT_CHILD_DOWN:
+ subvol = data;
- LOCK (&conf->subvolume_lock);
- {
- conf->subvolume_status[cnt] = 1;
- }
- UNLOCK (&conf->subvolume_lock);
+ if (conf->assert_no_child_down) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Received CHILD_DOWN. Exiting");
+ if (conf->defrag) {
+ gf_defrag_stop (conf->defrag, NULL);
+ } else {
+ kill (getpid(), SIGTERM);
+ }
+ }
- /* one of the node came back up, do a stat update */
- dht_get_du_info_for_subvol (this, cnt);
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ cnt = i;
+ break;
+ }
+ }
- break;
+ if (cnt == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "got GF_EVENT_CHILD_DOWN bad subvolume %s",
+ subvol->name);
+ break;
+ }
- case GF_EVENT_CHILD_DOWN:
- subvol = data;
+ LOCK (&conf->subvolume_lock);
+ {
+ conf->subvolume_status[cnt] = 0;
+ conf->last_event[cnt] = event;
+ conf->subvol_up_time[cnt] = 0;
+ }
+ UNLOCK (&conf->subvolume_lock);
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (subvol == conf->subvolumes[i]) {
- cnt = i;
- break;
- }
- }
+ break;
- if (cnt == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "got GF_EVENT_CHILD_DOWN bad subvolume %s",
- subvol->name);
- break;
- }
+ case GF_EVENT_CHILD_CONNECTING:
+ subvol = data;
- LOCK (&conf->subvolume_lock);
- {
- conf->subvolume_status[cnt] = 0;
- }
- UNLOCK (&conf->subvolume_lock);
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ cnt = i;
+ break;
+ }
+ }
- break;
- }
+ if (cnt == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "got GF_EVENT_CHILD_CONNECTING bad subvolume %s",
+ subvol->name);
+ break;
+ }
+
+ LOCK (&conf->subvolume_lock);
+ {
+ conf->last_event[cnt] = event;
+ }
+ UNLOCK (&conf->subvolume_lock);
+
+ break;
+ case GF_EVENT_VOLUME_DEFRAG:
+ {
+ if (!conf->defrag) {
+ return ret;
+ }
+ defrag = conf->defrag;
+
+ dict = data;
+ va_start (ap, data);
+ output = va_arg (ap, dict_t*);
+
+ ret = dict_get_int32 (dict, "rebalance-command",
+ (int32_t*)&cmd);
+ if (ret)
+ return ret;
+ LOCK (&defrag->lock);
+ {
+ if (defrag->is_exiting)
+ goto unlock;
+ if (cmd == GF_DEFRAG_CMD_STATUS)
+ gf_defrag_status_get (defrag, output);
+ else if (cmd == GF_DEFRAG_CMD_STOP)
+ gf_defrag_stop (defrag, output);
+ }
+unlock:
+ UNLOCK (&defrag->lock);
+ return 0;
+ break;
+ }
+
+ default:
+ propagate = 1;
+ break;
+ }
+
+
+ /* have all subvolumes reported status once by now? */
+ have_heard_from_all = 1;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->last_event[i])
+ have_heard_from_all = 0;
+ }
+
+ /* if all subvols have reported status, no need to hide anything
+ or wait for anything else. Just propagate blindly */
+ if (have_heard_from_all) {
+ propagate = 1;
- ret = default_notify (this, event, data);
+ }
+
+
+ if (!had_heard_from_all && have_heard_from_all) {
+ /* This is the first event which completes aggregation
+ of events from all subvolumes. If at least one subvol
+ had come up, propagate CHILD_UP, but only this time
+ */
+ event = GF_EVENT_CHILD_DOWN;
- return ret;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->last_event[i] == GF_EVENT_CHILD_UP) {
+ event = GF_EVENT_CHILD_UP;
+ break;
+ }
+
+ if (conf->last_event[i] == GF_EVENT_CHILD_CONNECTING) {
+ event = GF_EVENT_CHILD_CONNECTING;
+ /* continue to check other events for CHILD_UP */
+ }
+ }
+
+ /* rebalance is started with assert_no_child_down. So we do
+ * not need to handle CHILD_DOWN event here.
+ */
+ if (conf->defrag) {
+ ret = gf_thread_create (&conf->defrag->th, NULL,
+ gf_defrag_start, this);
+ if (ret) {
+ conf->defrag = NULL;
+ GF_FREE (conf->defrag);
+ kill (getpid(), SIGTERM);
+ }
+ }
+ }
+
+ ret = 0;
+ if (propagate)
+ ret = default_notify (this, event, data);
+
+ return ret;
}
+int
+dht_inode_ctx_layout_get (inode_t *inode, xlator_t *this, dht_layout_t **layout)
+{
+ dht_inode_ctx_t *ctx = NULL;
+ int ret = -1;
+
+ ret = dht_inode_ctx_get (inode, this, &ctx);
+
+ if (!ret && ctx) {
+ if (ctx->layout) {
+ if (layout)
+ *layout = ctx->layout;
+ ret = 0;
+ } else {
+ ret = -1;
+ }
+ }
+
+ return ret;
+}
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index f5c95b4cb..5ccd66799 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -22,223 +13,775 @@
#include "config.h"
#endif
+#include <regex.h>
+
+#include "dht-mem-types.h"
+#include "libxlator.h"
+#include "syncop.h"
+
#ifndef _DHT_H
#define _DHT_H
+#define GF_XATTR_FIX_LAYOUT_KEY "distribute.fix.layout"
+#define GF_DHT_LOOKUP_UNHASHED_ON 1
+#define GF_DHT_LOOKUP_UNHASHED_AUTO 2
+#define DHT_PATHINFO_HEADER "DISTRIBUTE:"
+
+#include <fnmatch.h>
typedef int (*dht_selfheal_dir_cbk_t) (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno);
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ dict_t *xdata);
+typedef int (*dht_defrag_cbk_fn_t) (xlator_t *this, call_frame_t *frame,
+ int ret);
struct dht_layout {
- int cnt;
- int preset;
- int gen;
- int type;
+ int spread_cnt; /* layout spread count per directory,
+ is controlled by 'setxattr()' with
+ special key */
+ int cnt;
+ int preset;
+ int gen;
+ int type;
+ int ref; /* use with dht_conf_t->layout_lock */
+ int search_unhashed;
struct {
- int err; /* 0 = normal
- -1 = dir exists and no xattr
- >0 = dir lookup failed with errno
- */
- uint32_t start;
- uint32_t stop;
- xlator_t *xlator;
- } list[0];
+ int err; /* 0 = normal
+ -1 = dir exists and no xattr
+ >0 = dir lookup failed with errno
+ */
+ uint32_t start;
+ uint32_t stop;
+ xlator_t *xlator;
+ } list[];
};
-typedef struct dht_layout dht_layout_t;
+typedef struct dht_layout dht_layout_t;
+
+struct dht_stat_time {
+ uint32_t atime;
+ uint32_t atime_nsec;
+ uint32_t ctime;
+ uint32_t ctime_nsec;
+ uint32_t mtime;
+ uint32_t mtime_nsec;
+};
+
+typedef struct dht_stat_time dht_stat_time_t;
+struct dht_inode_ctx {
+ dht_layout_t *layout;
+ dht_stat_time_t time;
+};
+
+typedef struct dht_inode_ctx dht_inode_ctx_t;
+
+
+typedef enum {
+ DHT_HASH_TYPE_DM,
+ DHT_HASH_TYPE_DM_USER,
+} dht_hashfn_type_t;
+
+/* rebalance related */
+struct dht_rebalance_ {
+ xlator_t *from_subvol;
+ xlator_t *target_node;
+ off_t offset;
+ size_t size;
+ int32_t flags;
+ int count;
+ struct iobref *iobref;
+ struct iovec *vector;
+ struct iatt stbuf;
+ dht_defrag_cbk_fn_t target_op_fn;
+ dict_t *xdata;
+};
struct dht_local {
- int call_cnt;
- loc_t loc;
- loc_t loc2;
- int op_ret;
- int op_errno;
- int layout_mismatch;
- struct stat stbuf;
- struct statvfs statvfs;
- fd_t *fd;
- inode_t *inode;
- dict_t *xattr;
- dict_t *xattr_req;
- dht_layout_t *layout;
- size_t size;
- ino_t st_ino;
- xlator_t *src_hashed, *src_cached;
- xlator_t *dst_hashed, *dst_cached;
- xlator_t *cached_subvol;
- xlator_t *hashed_subvol;
- char need_selfheal;
+ int call_cnt;
+ loc_t loc;
+ loc_t loc2;
+ int op_ret;
+ int op_errno;
+ int layout_mismatch;
+ /* Use stbuf as the postbuf, when we require both
+ * pre and post attrs */
+ struct iatt stbuf;
+ struct iatt prebuf;
+ struct iatt preoldparent;
+ struct iatt postoldparent;
+ struct iatt preparent;
+ struct iatt postparent;
+ struct statvfs statvfs;
+ fd_t *fd;
+ inode_t *inode;
+ dict_t *params;
+ dict_t *xattr;
+ dict_t *xattr_req;
+ dht_layout_t *layout;
+ size_t size;
+ ino_t ia_ino;
+ xlator_t *src_hashed, *src_cached;
+ xlator_t *dst_hashed, *dst_cached;
+ xlator_t *cached_subvol;
+ xlator_t *hashed_subvol;
+ char need_selfheal;
int file_count;
int dir_count;
- struct {
- fop_mknod_cbk_t linkfile_cbk;
- struct stat stbuf;
- loc_t loc;
- inode_t *inode;
- dict_t *xattr;
- xlator_t *srcvol;
- } linkfile;
- struct {
- uint32_t hole_cnt;
- uint32_t overlaps_cnt;
- uint32_t missing;
- uint32_t down;
- uint32_t misc;
- dht_selfheal_dir_cbk_t dir_cbk;
- dht_layout_t *layout;
- } selfheal;
-
- /* needed by nufa */
- int32_t flags;
- mode_t mode;
- dev_t rdev;
+ call_frame_t *main_frame;
+ int fop_succeeded;
+ struct {
+ fop_mknod_cbk_t linkfile_cbk;
+ struct iatt stbuf;
+ loc_t loc;
+ inode_t *inode;
+ dict_t *xattr;
+ xlator_t *srcvol;
+ } linkfile;
+ struct {
+ uint32_t hole_cnt;
+ uint32_t overlaps_cnt;
+ uint32_t down;
+ uint32_t misc;
+ dht_selfheal_dir_cbk_t dir_cbk;
+ dht_layout_t *layout;
+ } selfheal;
+ uint32_t uid;
+ uint32_t gid;
+
+ /* needed by nufa */
+ int32_t flags;
+ mode_t mode;
+ dev_t rdev;
+ mode_t umask;
+
+ /* need for file-info */
+ char *xattr_val;
+ char *key;
+
+ /* which xattr request? */
+ char xsel[256];
+ int32_t alloc_len;
+
+ char *newpath;
+
+ /* gfid related */
+ uuid_t gfid;
+
+ /*Marker Related*/
+ struct marker_str marker;
+
+ /* flag used to make sure we need to return estale in
+ {lookup,revalidate}_cbk */
+ char return_estale;
+ char need_lookup_everywhere;
+
+ glusterfs_fop_t fop;
+
+ gf_boolean_t linked;
+ xlator_t *link_subvol;
+
+ struct dht_rebalance_ rebalance;
+ xlator_t *first_up_subvol;
+
};
typedef struct dht_local dht_local_t;
/* du - disk-usage */
struct dht_du {
double avail_percent;
+ double avail_inodes;
uint64_t avail_space;
uint32_t log;
};
typedef struct dht_du dht_du_t;
+enum gf_defrag_type {
+ GF_DEFRAG_CMD_START = 1,
+ GF_DEFRAG_CMD_STOP = 1 + 1,
+ GF_DEFRAG_CMD_STATUS = 1 + 2,
+ GF_DEFRAG_CMD_START_LAYOUT_FIX = 1 + 3,
+ GF_DEFRAG_CMD_START_FORCE = 1 + 4,
+};
+typedef enum gf_defrag_type gf_defrag_type;
+
+enum gf_defrag_status_t {
+ GF_DEFRAG_STATUS_NOT_STARTED,
+ GF_DEFRAG_STATUS_STARTED,
+ GF_DEFRAG_STATUS_STOPPED,
+ GF_DEFRAG_STATUS_COMPLETE,
+ GF_DEFRAG_STATUS_FAILED,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_STOPPED,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_FAILED,
+};
+typedef enum gf_defrag_status_t gf_defrag_status_t;
+
+typedef struct gf_defrag_pattern_list gf_defrag_pattern_list_t;
+
+struct gf_defrag_pattern_list {
+ char path_pattern[256];
+ uint64_t size;
+ gf_defrag_pattern_list_t *next;
+};
+
+struct gf_defrag_info_ {
+ uint64_t total_files;
+ uint64_t total_data;
+ uint64_t num_files_lookedup;
+ uint64_t total_failures;
+ uint64_t skipped;
+ gf_lock_t lock;
+ int cmd;
+ pthread_t th;
+ gf_defrag_status_t defrag_status;
+ struct rpc_clnt *rpc;
+ uint32_t connected;
+ uint32_t is_exiting;
+ pid_t pid;
+ inode_t *root_inode;
+ uuid_t node_uuid;
+ struct timeval start_time;
+ gf_boolean_t stats;
+ gf_defrag_pattern_list_t *defrag_pattern;
+};
+
+typedef struct gf_defrag_info_ gf_defrag_info_t;
+
struct dht_conf {
- gf_lock_t subvolume_lock;
+ gf_lock_t subvolume_lock;
int subvolume_cnt;
xlator_t **subvolumes;
- xlator_t *local_volume; /* Needed by NUFA */
- char *subvolume_status;
- dht_layout_t **file_layouts;
- dht_layout_t **dir_layouts;
- dht_layout_t *default_dir_layout;
- gf_boolean_t search_unhashed;
- int gen;
+ char *subvolume_status;
+ int *last_event;
+ dht_layout_t **file_layouts;
+ dht_layout_t **dir_layouts;
+ gf_boolean_t search_unhashed;
+ int gen;
dht_du_t *du_stats;
- uint64_t min_free_disk;
+ double min_free_disk;
+ double min_free_inodes;
char disk_unit;
int32_t refresh_interval;
gf_boolean_t unhashed_sticky_bit;
- struct timeval last_stat_fetch;
+ struct timeval last_stat_fetch;
+ gf_lock_t layout_lock;
+ void *private; /* Can be used by wrapper xlators over
+ dht */
+ gf_boolean_t use_readdirp;
+ char vol_uuid[UUID_SIZE + 1];
+ gf_boolean_t assert_no_child_down;
+ time_t *subvol_up_time;
+
+ /* This is the count used as the distribute layout for a directory */
+ /* Will be a global flag to control the layout spread count */
+ uint32_t dir_spread_cnt;
+
+ /* to keep track of nodes which are decomissioned */
+ xlator_t **decommissioned_bricks;
+ int decommission_in_progress;
+ int decommission_subvols_cnt;
+
+ /* defrag related */
+ gf_defrag_info_t *defrag;
+
+ /* Request to filter directory entries in readdir request */
+
+ gf_boolean_t readdir_optimize;
+
+ /* Support regex-based name reinterpretation. */
+ regex_t rsync_regex;
+ gf_boolean_t rsync_regex_valid;
+ regex_t extra_regex;
+ gf_boolean_t extra_regex_valid;
+
+ /* Support variable xattr names. */
+ char *xattr_name;
+ char *link_xattr_name;
+ char *wild_xattr_name;
};
typedef struct dht_conf dht_conf_t;
struct dht_disk_layout {
- uint32_t cnt;
- uint32_t type;
- struct {
- uint32_t start;
- uint32_t stop;
- } list[1];
+ uint32_t cnt;
+ uint32_t type;
+ struct {
+ uint32_t start;
+ uint32_t stop;
+ } list[1];
};
typedef struct dht_disk_layout dht_disk_layout_t;
-
-#define ENTRY_MISSING(op_ret, op_errno) (op_ret == -1 && op_errno == ENOENT)
-#define is_fs_root(loc) (strcmp (loc->path, "/") == 0)
+typedef enum {
+ GF_DHT_MIGRATE_DATA,
+ GF_DHT_MIGRATE_DATA_EVEN_IF_LINK_EXISTS,
+ GF_DHT_MIGRATE_HARDLINK,
+ GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS
+} gf_dht_migrate_data_type_t;
+
+#define ENTRY_MISSING(op_ret, op_errno) (op_ret == -1 && op_errno == ENOENT)
-#define is_revalidate(loc) (inode_ctx_get (loc->inode, this, NULL) == 0)
+#define is_revalidate(loc) (dht_inode_ctx_layout_get (loc->inode, this, NULL) == 0)
#define is_last_call(cnt) (cnt == 0)
-#define DHT_LINKFILE_MODE (S_ISVTX)
-#define check_is_linkfile(i,s,x) ((s->st_mode & ~S_IFMT) == DHT_LINKFILE_MODE)
+#define DHT_MIGRATION_IN_PROGRESS 1
+#define DHT_MIGRATION_COMPLETED 2
+
+#define DHT_LINKFILE_MODE (S_ISVTX)
+
+#define check_is_linkfile(i,s,x,n) ( \
+ ((st_mode_from_ia ((s)->ia_prot, (s)->ia_type) & ~S_IFMT) \
+ == DHT_LINKFILE_MODE) && \
+ dict_get (x, n))
+
+#define IS_DHT_MIGRATION_PHASE2(buf) ( \
+ IA_ISREG ((buf)->ia_type) && \
+ ((st_mode_from_ia ((buf)->ia_prot, (buf)->ia_type) & \
+ ~S_IFMT) == DHT_LINKFILE_MODE))
-#define check_is_dir(i,s,x) (S_ISDIR(s->st_mode))
+#define IS_DHT_MIGRATION_PHASE1(buf) ( \
+ IA_ISREG ((buf)->ia_type) && \
+ ((buf)->ia_prot.sticky == 1) && \
+ ((buf)->ia_prot.sgid == 1))
+
+#define DHT_STRIP_PHASE1_FLAGS(buf) do { \
+ if ((buf) && IS_DHT_MIGRATION_PHASE1(buf)) { \
+ (buf)->ia_prot.sticky = 0; \
+ (buf)->ia_prot.sgid = 0; \
+ } \
+ } while (0)
+
+#define check_is_dir(i,s,x) (IA_ISDIR(s->ia_type))
#define layout_is_sane(layout) ((layout) && (layout->cnt > 0))
-#define DHT_STACK_UNWIND(frame, params ...) do { \
- dht_local_t *__local = NULL; \
- __local = frame->local; \
- frame->local = NULL; \
- STACK_UNWIND (frame, params); \
- dht_local_wipe (__local); \
- } while (0)
-
-#define DHT_STACK_DESTROY(frame) do { \
- dht_local_t *__local = NULL; \
- __local = frame->local; \
- frame->local = NULL; \
- STACK_DESTROY (frame->root); \
- dht_local_wipe (__local); \
- } while (0)
-
-dht_layout_t *dht_layout_new (xlator_t *this, int cnt);
-dht_layout_t *dht_layout_get (xlator_t *this, inode_t *inode);
-dht_layout_t *dht_layout_for_subvol (xlator_t *this, xlator_t *subvol);
-xlator_t *dht_layout_search (xlator_t *this, dht_layout_t *layout,
- const char *name);
-int dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout);
-int dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
- uint32_t *holes_p, uint32_t *overlaps_p,
- uint32_t *missing_p, uint32_t *down_p,
- uint32_t *misc_p);
-int dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout,
- xlator_t *subvol, loc_t *loc, dict_t *xattr);
+#define DHT_STACK_UNWIND(fop, frame, params ...) do { \
+ dht_local_t *__local = NULL; \
+ xlator_t *__xl = NULL; \
+ if (frame) { \
+ __xl = frame->this; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT (fop, frame, params); \
+ dht_local_wipe (__xl, __local); \
+ } while (0)
+
+#define DHT_STACK_DESTROY(frame) do { \
+ dht_local_t *__local = NULL; \
+ xlator_t *__xl = NULL; \
+ __xl = frame->this; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ STACK_DESTROY (frame->root); \
+ dht_local_wipe (__xl, __local); \
+ } while (0)
+
+#define DHT_UPDATE_TIME(ctx_sec, ctx_nsec, new_sec, new_nsec, inode, post) do {\
+ int32_t sec = 0; \
+ sec = new_sec; \
+ LOCK (&inode->lock); \
+ { \
+ new_sec = max(new_sec, ctx_sec); \
+ if (sec < new_sec) \
+ new_nsec = ctx_nsec; \
+ if (sec == new_sec) \
+ new_nsec = max (new_nsec, ctx_nsec); \
+ if (post) { \
+ ctx_sec = new_sec; \
+ ctx_nsec = new_nsec; \
+ } \
+ } \
+ UNLOCK (&inode->lock); \
+ } while (0)
+
+#define is_greater_time(a, an, b, bn) (((a) < (b)) || (((a) == (b)) && ((an) < (bn))))
+dht_layout_t *dht_layout_new (xlator_t *this, int cnt);
+dht_layout_t *dht_layout_get (xlator_t *this, inode_t *inode);
+dht_layout_t *dht_layout_for_subvol (xlator_t *this, xlator_t *subvol);
+xlator_t *dht_layout_search (xlator_t *this, dht_layout_t *layout,
+ const char *name);
+int dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout);
+int dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
+ uint32_t *holes_p, uint32_t *overlaps_p,
+ uint32_t *missing_p, uint32_t *down_p,
+ uint32_t *misc_p, uint32_t *no_space_p);
+int dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout,
+ xlator_t *subvol, loc_t *loc, dict_t *xattr);
xlator_t *dht_linkfile_subvol (xlator_t *this, inode_t *inode,
- struct stat *buf, dict_t *xattr);
-int dht_linkfile_unlink (call_frame_t *frame, xlator_t *this,
- xlator_t *subvol, loc_t *loc);
+ struct iatt *buf, dict_t *xattr);
+int dht_linkfile_unlink (call_frame_t *frame, xlator_t *this,
+ xlator_t *subvol, loc_t *loc);
int dht_layouts_init (xlator_t *this, dht_conf_t *conf);
int dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
- int op_ret, int op_errno, dict_t *xattr);
+ int op_ret, int op_errno, dict_t *xattr);
int dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout,
- int pos, int32_t **disk_layout_p);
-int dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout,
- int pos, int32_t *disk_layout);
+ int pos, int32_t **disk_layout_p);
+int dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout,
+ int pos, void *disk_layout_raw, int disk_layout_len);
int dht_frame_return (call_frame_t *frame);
-int dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y);
+int dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y);
int dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol,
- uint64_t *x);
+ uint64_t *x);
-void dht_local_wipe (dht_local_t *local);
-dht_local_t *dht_local_init (call_frame_t *frame);
-int dht_stat_merge (xlator_t *this, struct stat *to, struct stat *from,
- xlator_t *subvol);
+void dht_local_wipe (xlator_t *this, dht_local_t *local);
+dht_local_t *dht_local_init (call_frame_t *frame, loc_t *loc, fd_t *fd,
+ glusterfs_fop_t fop);
+int dht_iatt_merge (xlator_t *this, struct iatt *to, struct iatt *from,
+ xlator_t *subvol);
xlator_t *dht_subvol_get_hashed (xlator_t *this, loc_t *loc);
xlator_t *dht_subvol_get_cached (xlator_t *this, inode_t *inode);
xlator_t *dht_subvol_next (xlator_t *this, xlator_t *prev);
-int dht_subvol_cnt (xlator_t *this, xlator_t *subvol);
+xlator_t *dht_subvol_next_available (xlator_t *this, xlator_t *prev);
+int dht_subvol_cnt (xlator_t *this, xlator_t *subvol);
-int dht_hash_compute (int type, const char *name, uint32_t *hash_p);
+int dht_hash_compute (xlator_t *this, int type, const char *name, uint32_t *hash_p);
-int dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
- xlator_t *tovol, xlator_t *fromvol, loc_t *loc);
-int dht_lookup_directory (call_frame_t *frame, xlator_t *this, loc_t *loc);
-int dht_lookup_everywhere (call_frame_t *frame, xlator_t *this, loc_t *loc);
+int dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
+ xlator_t *this, xlator_t *tovol,
+ xlator_t *fromvol, loc_t *loc);
+int dht_lookup_directory (call_frame_t *frame, xlator_t *this, loc_t *loc);
+int dht_lookup_everywhere (call_frame_t *frame, xlator_t *this, loc_t *loc);
int
-dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
- loc_t *loc, dht_layout_t *layout);
+dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
+ loc_t *loc, dht_layout_t *layout);
int
dht_selfheal_new_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
- dht_layout_t *layout);
+ dht_layout_t *layout);
int
-dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
- loc_t *loc, dht_layout_t *layout);
+dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
+ loc_t *loc, dht_layout_t *layout);
int
dht_layout_sort_volname (dht_layout_t *layout);
-int dht_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc);
-
int dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc);
-int dht_is_subvol_filled (xlator_t *this, xlator_t *subvol);
-xlator_t *dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol);
-int dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx);
+gf_boolean_t dht_is_subvol_filled (xlator_t *this, xlator_t *subvol);
+xlator_t *dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol,
+ dht_local_t *layout);
+int dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx);
+
+int dht_layout_preset (xlator_t *this, xlator_t *subvol, inode_t *inode);
+int dht_layout_set (xlator_t *this, inode_t *inode, dht_layout_t *layout);;
+void dht_layout_unref (xlator_t *this, dht_layout_t *layout);
+dht_layout_t *dht_layout_ref (xlator_t *this, dht_layout_t *layout);
+xlator_t *dht_first_up_subvol (xlator_t *this);
+xlator_t *dht_last_up_subvol (xlator_t *this);
+
+int dht_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name);
+
+int dht_filter_loc_subvol_key (xlator_t *this, loc_t *loc, loc_t *new_loc,
+ xlator_t **subvol);
+
+int dht_rename_cleanup (call_frame_t *frame);
+int dht_rename_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata);
+
+int dht_fix_directory_layout (call_frame_t *frame,
+ dht_selfheal_dir_cbk_t dir_cbk,
+ dht_layout_t *layout);
+
+int dht_init_subvolumes (xlator_t *this, dht_conf_t *conf);
+
+/* migration/rebalance */
+int dht_start_rebalance_task (xlator_t *this, call_frame_t *frame);
+
+int dht_rebalance_in_progress_check (xlator_t *this, call_frame_t *frame);
+int dht_rebalance_complete_check (xlator_t *this, call_frame_t *frame);
+
+
+/* FOPS */
+int32_t dht_lookup (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *xattr_req);
+
+int32_t dht_stat (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, dict_t *xdata);
+
+int32_t dht_fstat (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd, dict_t *xdata);
+
+int32_t dht_truncate (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ off_t offset, dict_t *xdata);
+
+int32_t dht_ftruncate (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset, dict_t *xdata);
+
+int32_t dht_access (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t mask, dict_t *xdata);
+
+int32_t dht_readlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ size_t size, dict_t *xdata);
+
+int32_t dht_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata);
+
+int32_t dht_mkdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata);
+
+int32_t dht_unlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, int xflag, dict_t *xdata);
+
+int32_t dht_rmdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int flags, dict_t *xdata);
+
+int32_t dht_symlink (call_frame_t *frame, xlator_t *this,
+ const char *linkpath, loc_t *loc, mode_t umask,
+ dict_t *xdata);
+
+int32_t dht_rename (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata);
+
+int32_t dht_link (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata);
+
+int32_t dht_create (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *params);
+
+int32_t dht_open (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata);
+
+int32_t dht_readv (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata);
+
+int32_t dht_writev (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vector,
+ int32_t count,
+ off_t offset,
+ uint32_t flags,
+ struct iobref *iobref, dict_t *xdata);
+
+int32_t dht_flush (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd, dict_t *xdata);
+
+int32_t dht_fsync (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t datasync, dict_t *xdata);
+
+int32_t dht_opendir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, fd_t *fd, dict_t *xdata);
+
+int32_t dht_fsyncdir (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t datasync, dict_t *xdata);
+
+int32_t dht_statfs (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, dict_t *xdata);
+
+int32_t dht_setxattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *dict,
+ int32_t flags, dict_t *xdata);
+
+int32_t dht_getxattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name, dict_t *xdata);
+
+int32_t dht_fsetxattr (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ dict_t *dict,
+ int32_t flags, dict_t *xdata);
+
+int32_t dht_fgetxattr (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ const char *name, dict_t *xdata);
+
+int32_t dht_removexattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name, dict_t *xdata);
+int32_t dht_fremovexattr (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ const char *name, dict_t *xdata);
+
+int32_t dht_lk (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
+
+int32_t dht_inodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
+
+int32_t dht_finodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
+
+int32_t dht_entrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
+
+int32_t dht_fentrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
+
+int32_t dht_readdir (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size, off_t off, dict_t *xdata);
+
+int32_t dht_readdirp (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size, off_t off, dict_t *dict);
+
+int32_t dht_xattrop (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ gf_xattrop_flags_t flags,
+ dict_t *dict, dict_t *xdata);
+
+int32_t dht_fxattrop (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ gf_xattrop_flags_t flags,
+ dict_t *dict, dict_t *xdata);
+
+int32_t dht_forget (xlator_t *this, inode_t *inode);
+int32_t dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+int32_t dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+int32_t dht_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t mode, off_t offset, size_t len, dict_t *xdata);
+int32_t dht_discard(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata);
+int32_t dht_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata);
+
+int32_t dht_init (xlator_t *this);
+void dht_fini (xlator_t *this);
+int dht_reconfigure (xlator_t *this, dict_t *options);
+int32_t dht_notify (xlator_t *this, int32_t event, void *data, ...);
+
+/* definitions for nufa/switch */
+int dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent);
+int dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent);
+int dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent);
+int dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent);
+int dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ fd_t *fd, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata);
+int dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int
+gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict);
+
+int
+gf_defrag_stop (gf_defrag_info_t *defrag, dict_t *output);
+
+void*
+gf_defrag_start (void *this);
+
+int32_t
+gf_defrag_handle_hardlink (xlator_t *this, loc_t *loc, dict_t *xattrs,
+ struct iatt *stbuf);
+int
+dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
+ int flag);
+int
+dht_inode_ctx_layout_get (inode_t *inode, xlator_t *this,
+ dht_layout_t **layout_int);
+int
+dht_inode_ctx_layout_set (inode_t *inode, xlator_t *this,
+ dht_layout_t* layout_int);
+int
+dht_inode_ctx_time_update (inode_t *inode, xlator_t *this, struct iatt *stat,
+ int32_t update_ctx);
-int dht_layout_inode_set (xlator_t *this, xlator_t *subvol, inode_t *inode);
-xlator_t *dht_first_up_subvol (xlator_t *this);
+int dht_inode_ctx_get (inode_t *inode, xlator_t *this, dht_inode_ctx_t **ctx);
+int dht_inode_ctx_set (inode_t *inode, xlator_t *this, dht_inode_ctx_t *ctx);
+int
+dht_dir_attr_heal (void *data);
+int
+dht_dir_attr_heal_done (int ret, call_frame_t *sync_frame, void *data);
+int
+dht_dir_has_layout (dict_t *xattr, char *name);
+gf_boolean_t
+dht_is_subvol_in_layout (dht_layout_t *layout, xlator_t *xlator);
+xlator_t *
+dht_subvol_with_free_space_inodes (xlator_t *this, xlator_t *subvol,
+ dht_layout_t *layout);
+xlator_t *
+dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol,
+ dht_layout_t *layout);
+int
+dht_linkfile_attr_heal (call_frame_t *frame, xlator_t *this);
+
+void
+dht_layout_dump (dht_layout_t *layout, const char *prefix);
+int32_t
+dht_priv_dump (xlator_t *this);
+int32_t
+dht_inodectx_dump (xlator_t *this, inode_t *inode);
+
+int
+dht_inode_ctx_get1 (xlator_t *this, inode_t *inode, xlator_t **subvol);
-#endif /* _DHT_H */
+#endif/* _DHT_H */
diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c
index 468e88798..fe3955ecb 100644
--- a/xlators/cluster/dht/src/dht-diskusage.c
+++ b/xlators/cluster/dht/src/dht-diskusage.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -33,52 +24,70 @@
#include <sys/time.h>
-int
+int
dht_du_info_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct statvfs *statvfs)
+ int op_ret, int op_errno, struct statvfs *statvfs,
+ dict_t *xdata)
{
dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
+ call_frame_t *prev = NULL;
int this_call_cnt = 0;
- int i = 0;
- double percent = 0;
- uint64_t bytes = 0;
-
- local = frame->local;
- conf = this->private;
- prev = cookie;
-
- if (op_ret == -1)
- goto out;
-
- if (statvfs && statvfs->f_blocks) {
- percent = (statvfs->f_bfree * 100) / statvfs->f_blocks;
- bytes = (statvfs->f_bfree * statvfs->f_bsize);
- }
-
- LOCK (&conf->subvolume_lock);
- {
- for (i = 0; i < conf->subvolume_cnt; i++)
- if (prev->this == conf->subvolumes[i]) {
- conf->du_stats[i].avail_percent = percent;
- conf->du_stats[i].avail_space = bytes;
- gf_log (this->name, GF_LOG_DEBUG,
- "on subvolume '%s': avail_percent is: "
- "%.2f and avail_space is: %"PRIu64"",
- prev->this->name,
- conf->du_stats[i].avail_percent,
- conf->du_stats[i].avail_space);
- }
- }
- UNLOCK (&conf->subvolume_lock);
+ int i = 0;
+ double percent = 0;
+ double percent_inodes = 0;
+ uint64_t bytes = 0;
- out:
+ conf = this->private;
+ prev = cookie;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to get disk info from %s", prev->this->name);
+ goto out;
+ }
+
+ if (statvfs && statvfs->f_blocks) {
+ percent = (statvfs->f_bavail * 100) / statvfs->f_blocks;
+ bytes = (statvfs->f_bavail * statvfs->f_frsize);
+ }
+
+ if (statvfs && statvfs->f_files) {
+ percent_inodes = (statvfs->f_ffree * 100) / statvfs->f_files;
+ } else {
+ /* set percent inodes to 100 for dynamically allocated inode filesystems
+ this logic holds good so that, distribute has nothing to worry about
+ total inodes rather let the 'create()' to be scheduled on the hashed
+ subvol regardless of the total inodes. since we have no awareness on
+ loosing inodes this logic fits well
+ */
+ percent_inodes = 100;
+ }
+
+ LOCK (&conf->subvolume_lock);
+ {
+ for (i = 0; i < conf->subvolume_cnt; i++)
+ if (prev->this == conf->subvolumes[i]) {
+ conf->du_stats[i].avail_percent = percent;
+ conf->du_stats[i].avail_space = bytes;
+ conf->du_stats[i].avail_inodes = percent_inodes;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "on subvolume '%s': avail_percent is: "
+ "%.2f and avail_space is: %"PRIu64" "
+ "and avail_inodes is: %.2f",
+ prev->this->name,
+ conf->du_stats[i].avail_percent,
+ conf->du_stats[i].avail_space,
+ conf->du_stats[i].avail_inodes);
+ }
+ }
+ UNLOCK (&conf->subvolume_lock);
+
+out:
this_call_cnt = dht_frame_return (frame);
if (is_last_call (this_call_cnt))
DHT_STACK_DESTROY (frame);
- return 0;
+ return 0;
}
int
@@ -87,177 +96,319 @@ dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx)
dht_conf_t *conf = NULL;
call_frame_t *statfs_frame = NULL;
dht_local_t *statfs_local = NULL;
- call_pool_t *pool = NULL;
+ call_pool_t *pool = NULL;
+ loc_t tmp_loc = {0,};
conf = this->private;
- pool = this->ctx->pool;
-
- statfs_frame = create_frame (this, pool);
- if (!statfs_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- statfs_local = dht_local_init (statfs_frame);
- if (!statfs_local) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- loc_t tmp_loc = { .inode = NULL,
- .path = "/",
- };
-
- statfs_local->call_cnt = 1;
- STACK_WIND (statfs_frame, dht_du_info_cbk,
- conf->subvolumes[subvol_idx],
- conf->subvolumes[subvol_idx]->fops->statfs,
- &tmp_loc);
-
- return 0;
- err:
+ pool = this->ctx->pool;
+
+ statfs_frame = create_frame (this, pool);
+ if (!statfs_frame) {
+ goto err;
+ }
+
+ /* local->fop value is not used in this case */
+ statfs_local = dht_local_init (statfs_frame, NULL, NULL,
+ GF_FOP_MAXVALUE);
+ if (!statfs_local) {
+ goto err;
+ }
+
+ /* make it root gfid, should be enough to get the proper info back */
+ tmp_loc.gfid[15] = 1;
+
+ statfs_local->call_cnt = 1;
+ STACK_WIND (statfs_frame, dht_du_info_cbk,
+ conf->subvolumes[subvol_idx],
+ conf->subvolumes[subvol_idx]->fops->statfs,
+ &tmp_loc, NULL);
+
+ return 0;
+err:
if (statfs_frame)
DHT_STACK_DESTROY (statfs_frame);
-
- return -1;
+
+ return -1;
}
int
dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc)
{
- int i = 0;
+ int i = 0;
dht_conf_t *conf = NULL;
call_frame_t *statfs_frame = NULL;
dht_local_t *statfs_local = NULL;
- struct timeval tv = {0,};
+ struct timeval tv = {0,};
+ loc_t tmp_loc = {0,};
conf = this->private;
gettimeofday (&tv, NULL);
- if (tv.tv_sec > (conf->refresh_interval
- + conf->last_stat_fetch.tv_sec)) {
- statfs_frame = copy_frame (frame);
- if (!statfs_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- statfs_local = dht_local_init (statfs_frame);
- if (!statfs_local) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ /* make it root gfid, should be enough to get the proper
+ info back */
+ tmp_loc.gfid[15] = 1;
- loc_copy (&statfs_local->loc, loc);
- loc_t tmp_loc = { .inode = NULL,
- .path = "/",
- };
-
- statfs_local->call_cnt = conf->subvolume_cnt;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (statfs_frame, dht_du_info_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->statfs,
- &tmp_loc);
- }
+ if (tv.tv_sec > (conf->refresh_interval
+ + conf->last_stat_fetch.tv_sec)) {
- conf->last_stat_fetch.tv_sec = tv.tv_sec;
- }
- return 0;
+ statfs_frame = copy_frame (frame);
+ if (!statfs_frame) {
+ goto err;
+ }
+
+ /* In this case, 'local->fop' is not used */
+ statfs_local = dht_local_init (statfs_frame, loc, NULL,
+ GF_FOP_MAXVALUE);
+ if (!statfs_local) {
+ goto err;
+ }
+
+ statfs_local->call_cnt = conf->subvolume_cnt;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND (statfs_frame, dht_du_info_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->statfs,
+ &tmp_loc, NULL);
+ }
+
+ conf->last_stat_fetch.tv_sec = tv.tv_sec;
+ }
+ return 0;
err:
if (statfs_frame)
DHT_STACK_DESTROY (statfs_frame);
- return -1;
+ return -1;
}
-int
+gf_boolean_t
dht_is_subvol_filled (xlator_t *this, xlator_t *subvol)
{
- int i = 0;
- int subvol_filled = 0;
+ int i = 0;
dht_conf_t *conf = NULL;
+ gf_boolean_t subvol_filled_inodes = _gf_false;
+ gf_boolean_t subvol_filled_space = _gf_false;
+ gf_boolean_t is_subvol_filled = _gf_false;
- conf = this->private;
+ conf = this->private;
+
+ /* Check for values above specified percent or free disk */
+ LOCK (&conf->subvolume_lock);
+ {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ if (conf->disk_unit == 'p') {
+ if (conf->du_stats[i].avail_percent <
+ conf->min_free_disk) {
+ subvol_filled_space = _gf_true;
+ break;
+ }
+
+ } else {
+ if (conf->du_stats[i].avail_space <
+ conf->min_free_disk) {
+ subvol_filled_space = _gf_true;
+ break;
+ }
+ }
+ if (conf->du_stats[i].avail_inodes <
+ conf->min_free_inodes) {
+ subvol_filled_inodes = _gf_true;
+ break;
+ }
+ }
+ }
+ }
+ UNLOCK (&conf->subvolume_lock);
+
+ if (subvol_filled_space && conf->subvolume_status[i]) {
+ if (!(conf->du_stats[i].log++ % (GF_UNIVERSAL_ANSWER * 10))) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "disk space on subvolume '%s' is getting "
+ "full (%.2f %%), consider adding more nodes",
+ subvol->name,
+ (100 - conf->du_stats[i].avail_percent));
+ }
+ }
+
+ if (subvol_filled_inodes && conf->subvolume_status[i]) {
+ if (!(conf->du_stats[i].log++ % (GF_UNIVERSAL_ANSWER * 10))) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "inodes on subvolume '%s' are at "
+ "(%.2f %%), consider adding more nodes",
+ subvol->name,
+ (100 - conf->du_stats[i].avail_inodes));
+ }
+ }
+
+ is_subvol_filled = (subvol_filled_space || subvol_filled_inodes);
+
+ return is_subvol_filled;
+}
+
+
+/*Get the best subvolume to create the file in*/
+xlator_t *
+dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol,
+ dht_local_t *local)
+{
+ xlator_t *avail_subvol = NULL;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *layout = NULL;
+ loc_t *loc = NULL;
+
+ conf = this->private;
+ if (!local)
+ goto out;
+ loc = &local->loc;
+ if (!local->layout) {
+ layout = dht_layout_get (this, loc->parent);
+
+ if (!layout) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "layout missing path=%s parent=%s",
+ loc->path, uuid_utoa (loc->parent->gfid));
+ goto out;
+ }
+ } else {
+ layout = dht_layout_ref (this, local->layout);
+ }
- /* Check for values above specified percent or free disk */
LOCK (&conf->subvolume_lock);
- {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (subvol == conf->subvolumes[i]) {
- if (conf->disk_unit == 'p') {
- if (conf->du_stats[i].avail_percent <
- conf->min_free_disk) {
- subvol_filled = 1;
- break;
- }
- } else {
- if (conf->du_stats[i].avail_space <
- conf->min_free_disk) {
- subvol_filled = 1;
- break;
- }
- }
- }
+ {
+ avail_subvol = dht_subvol_with_free_space_inodes(this, subvol,
+ layout);
+ if(!avail_subvol)
+ {
+ avail_subvol = dht_subvol_maxspace_nonzeroinode(this,
+ subvol,
+ layout);
+ }
+
+ }
+ UNLOCK (&conf->subvolume_lock);
+out:
+ if (!avail_subvol) {
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "no subvolume has enough free space and/or inodes\
+ to create");
+ avail_subvol = subvol;
+ }
+
+ if (layout)
+ dht_layout_unref (this, layout);
+ return avail_subvol;
+}
+
+static inline
+int32_t dht_subvol_has_err (xlator_t *this, dht_layout_t *layout)
+{
+ int ret = -1;
+ int i = 0;
+
+ if (!this || !layout)
+ goto out;
+
+ /* check if subvol has layout errors, before selecting it */
+ for (i = 0; i < layout->cnt; i++) {
+ if (!strcmp (layout->list[i].xlator->name, this->name) &&
+ (layout->list[i].err != 0)) {
+ ret = -1;
+ goto out;
}
}
- UNLOCK (&conf->subvolume_lock);
-
- if (subvol_filled) {
- if (!(conf->du_stats[i].log++ % GF_UNIVERSAL_ANSWER)) {
- gf_log (this->name, GF_LOG_WARNING,
- "disk space on subvolume '%s' is getting "
- "full (%.2f %%), consider adding more nodes",
- subvol->name,
- (100 - conf->du_stats[i].avail_percent));
+ ret = 0;
+out:
+ return ret;
+}
+
+/*Get subvolume which has both space and inodes more than the min criteria*/
+xlator_t *
+dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol,
+ dht_layout_t *layout)
+{
+ int i = 0;
+ double max = 0;
+ double max_inodes = 0;
+ int ignore_subvol = 0;
+
+ xlator_t *avail_subvol = NULL;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ for(i=0; i < conf->subvolume_cnt; i++) {
+ /* check if subvol has layout errors, before selecting it */
+ ignore_subvol = dht_subvol_has_err (conf->subvolumes[i],
+ layout);
+ if (ignore_subvol)
+ continue;
+
+ if ((conf->disk_unit == 'p') &&
+ (conf->du_stats[i].avail_percent > conf->min_free_disk) &&
+ (conf->du_stats[i].avail_inodes > conf->min_free_inodes)) {
+ if ((conf->du_stats[i].avail_inodes > max_inodes) ||
+ (conf->du_stats[i].avail_percent > max)) {
+ max = conf->du_stats[i].avail_percent;
+ max_inodes = conf->du_stats[i].avail_inodes;
+ avail_subvol = conf->subvolumes[i];
+ }
+ }
+
+ if ((conf->disk_unit != 'p') &&
+ (conf->du_stats[i].avail_space > conf->min_free_disk) &&
+ (conf->du_stats[i].avail_inodes > conf->min_free_inodes)) {
+ if ((conf->du_stats[i].avail_inodes > max_inodes) ||
+ (conf->du_stats[i].avail_space > max)) {
+ max = conf->du_stats[i].avail_space;
+ max_inodes = conf->du_stats[i].avail_inodes;
+ avail_subvol = conf->subvolumes[i];
+ }
}
}
- return subvol_filled;
+ return avail_subvol;
}
+
+/* Get subvol which has atleast one inode and maximum space */
xlator_t *
-dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol)
+dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol,
+ dht_layout_t *layout)
{
int i = 0;
- double max= 0;
+ double max = 0;
+ int ignore_subvol = 0;
+
xlator_t *avail_subvol = NULL;
- dht_conf_t *conf = NULL;
+ dht_conf_t *conf = NULL;
conf = this->private;
- avail_subvol = subvol;
- LOCK (&conf->subvolume_lock);
- {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->disk_unit == 'p') {
- if (conf->du_stats[i].avail_percent > max) {
- max = conf->du_stats[i].avail_percent;
- avail_subvol = conf->subvolumes[i];
- }
- } else {
- if (conf->du_stats[i].avail_space > max) {
- max = conf->du_stats[i].avail_space;
- avail_subvol = conf->subvolumes[i];
- }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ /* check if subvol has layout errors, before selecting it */
+ ignore_subvol = dht_subvol_has_err (conf->subvolumes[i],
+ layout);
+ if (ignore_subvol)
+ continue;
+
+ if (conf->disk_unit == 'p') {
+ if ((conf->du_stats[i].avail_percent > max)
+ && (conf->du_stats[i].avail_inodes > 0 )) {
+ max = conf->du_stats[i].avail_percent;
+ avail_subvol = conf->subvolumes[i];
}
- }
+ } else {
+ if ((conf->du_stats[i].avail_space > max)
+ && (conf->du_stats[i].avail_inodes > 0)) {
+ max = conf->du_stats[i].avail_space;
+ avail_subvol = conf->subvolumes[i];
+ }
+ }
}
- UNLOCK (&conf->subvolume_lock);
-
- if (max < conf->min_free_disk)
- avail_subvol = subvol;
- if (avail_subvol == subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume has enough free space to create");
- }
-
return avail_subvol;
}
diff --git a/xlators/cluster/dht/src/dht-hashfn.c b/xlators/cluster/dht/src/dht-hashfn.c
index 31cb828a4..656cf23a0 100644
--- a/xlators/cluster/dht/src/dht-hashfn.c
+++ b/xlators/cluster/dht/src/dht-hashfn.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -29,58 +20,92 @@
#include "hashfn.h"
-typedef enum {
- DHT_HASH_TYPE_DM,
-} dht_hashfn_type_t;
-
-
int
dht_hash_compute_internal (int type, const char *name, uint32_t *hash_p)
{
- int ret = 0;
- uint32_t hash = 0;
-
- switch (type) {
- case DHT_HASH_TYPE_DM:
- hash = gf_dm_hashfn (name, strlen (name));
- break;
- default:
- ret = -1;
- break;
- }
-
- if (ret == 0) {
- *hash_p = hash;
- }
-
- return ret;
+ int ret = 0;
+ uint32_t hash = 0;
+
+ switch (type) {
+ case DHT_HASH_TYPE_DM:
+ case DHT_HASH_TYPE_DM_USER:
+ hash = gf_dm_hashfn (name, strlen (name));
+ break;
+ default:
+ ret = -1;
+ break;
+ }
+
+ if (ret == 0) {
+ *hash_p = hash;
+ }
+
+ return ret;
}
-#define MAKE_RSYNC_FRIENDLY_NAME(rsync_frndly_name, name) do { \
- rsync_frndly_name = (char *) name; \
- if (name[0] == '.') { \
- char *dot = 0; \
- int namelen = 0; \
- \
- dot = strrchr (name, '.'); \
- if (dot && dot > (name + 1) && *(dot + 1)) { \
- namelen = (dot - name); \
- rsync_frndly_name = alloca (namelen); \
- strncpy (rsync_frndly_name, name + 1, \
- namelen); \
- rsync_frndly_name[namelen - 1] = 0; \
- } \
- } \
- } while (0);
-
+static inline
+gf_boolean_t
+dht_munge_name (const char *original, char *modified, size_t len, regex_t *re)
+{
+ regmatch_t matches[2];
+ size_t new_len;
+
+ if (regexec(re,original,2,matches,0) != REG_NOMATCH) {
+ if (matches[1].rm_so != -1) {
+ new_len = matches[1].rm_eo - matches[1].rm_so;
+ /* Equal would fail due to the NUL at the end. */
+ if (new_len < len) {
+ memcpy (modified,original+matches[1].rm_so,
+ new_len);
+ modified[new_len] = '\0';
+ return _gf_true;
+ }
+ }
+ }
+
+ /* This is guaranteed safe because of how the dest was allocated. */
+ strcpy(modified,original);
+ return _gf_false;
+}
int
-dht_hash_compute (int type, const char *name, uint32_t *hash_p)
+dht_hash_compute (xlator_t *this, int type, const char *name, uint32_t *hash_p)
{
- char *rsync_friendly_name = NULL;
-
- MAKE_RSYNC_FRIENDLY_NAME (rsync_friendly_name, name);
-
- return dht_hash_compute_internal (type, rsync_friendly_name, hash_p);
+ char *rsync_friendly_name = NULL;
+ dht_conf_t *priv = this->private;
+ size_t len = 0;
+ gf_boolean_t munged = _gf_false;
+
+ /*
+ * It wouldn't be safe to use alloca in an inline function that doesn't
+ * actually get inlined, and it wouldn't be efficient to do a real
+ * allocation, so we use alloca here (if needed) and pass that to the
+ * inline.
+ */
+
+ if (priv->extra_regex_valid) {
+ len = strlen(name) + 1;
+ rsync_friendly_name = alloca(len);
+ munged = dht_munge_name (name, rsync_friendly_name, len,
+ &priv->extra_regex);
+ }
+
+ if (!munged && priv->rsync_regex_valid) {
+ len = strlen(name) + 1;
+ rsync_friendly_name = alloca(len);
+ gf_log (this->name, GF_LOG_TRACE, "trying regex for %s", name);
+ munged = dht_munge_name (name, rsync_friendly_name, len,
+ &priv->rsync_regex);
+ if (munged) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "munged down to %s", rsync_friendly_name);
+ }
+ }
+
+ if (!munged) {
+ rsync_friendly_name = (char *)name;
+ }
+
+ return dht_hash_compute_internal (type, rsync_friendly_name, hash_p);
}
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
index 10fc1ccda..311a48112 100644
--- a/xlators/cluster/dht/src/dht-helper.c
+++ b/xlators/cluster/dht/src/dht-helper.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -27,177 +18,406 @@
#include "xlator.h"
#include "dht-common.h"
+static inline int
+dht_inode_ctx_set1 (xlator_t *this, inode_t *inode, xlator_t *subvol)
+{
+ uint64_t tmp_subvol = 0;
+
+ tmp_subvol = (long)subvol;
+ return inode_ctx_set1 (inode, this, &tmp_subvol);
+}
+
+int
+dht_inode_ctx_get1 (xlator_t *this, inode_t *inode, xlator_t **subvol)
+{
+ int ret = -1;
+ uint64_t tmp_subvol = 0;
+
+ ret = inode_ctx_get1 (inode, this, &tmp_subvol);
+ if (tmp_subvol && subvol)
+ *subvol = (xlator_t *)tmp_subvol;
+
+ return ret;
+}
+
int
dht_frame_return (call_frame_t *frame)
{
- dht_local_t *local = NULL;
- int this_call_cnt = -1;
+ dht_local_t *local = NULL;
+ int this_call_cnt = -1;
+
+ if (!frame)
+ return -1;
+
+ local = frame->local;
- if (!frame)
- return -1;
+ LOCK (&frame->lock);
+ {
+ this_call_cnt = --local->call_cnt;
+ }
+ UNLOCK (&frame->lock);
+
+ return this_call_cnt;
+}
- local = frame->local;
- LOCK (&frame->lock);
- {
- this_call_cnt = --local->call_cnt;
+static uint64_t
+dht_bits_for (uint64_t num)
+{
+ uint64_t bits = 0, ctrl = 1;
+
+ while (ctrl < num) {
+ ctrl *= 2;
+ bits ++;
}
- UNLOCK (&frame->lock);
- return this_call_cnt;
+ return bits;
}
+/*
+ * A slightly "updated" version of the algorithm described in the commit log
+ * is used here.
+ *
+ * The only enhancement is that:
+ *
+ * - The number of bits used by the backend filesystem for HUGE d_off which
+ * is described as 63, and
+ * - The number of bits used by the d_off presented by the transformation
+ * upwards which is described as 64, are both made "configurable."
+ */
+
+
+#define BACKEND_D_OFF_BITS 63
+#define PRESENT_D_OFF_BITS 63
+
+#define ONE 1ULL
+#define MASK (~0ULL)
+#define PRESENT_MASK (MASK >> (64 - PRESENT_D_OFF_BITS))
+#define BACKEND_MASK (MASK >> (64 - BACKEND_D_OFF_BITS))
+
+#define TOP_BIT (ONE << (PRESENT_D_OFF_BITS - 1))
+#define SHIFT_BITS (max (0, (BACKEND_D_OFF_BITS - PRESENT_D_OFF_BITS + 1)))
int
dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y_p)
{
- dht_conf_t *conf = NULL;
- int cnt = 0;
- int max = 0;
- uint64_t y = 0;
+ dht_conf_t *conf = NULL;
+ int cnt = 0;
+ int max = 0;
+ uint64_t y = 0;
+ uint64_t hi_mask = 0;
+ uint64_t off_mask = 0;
+ int max_bits = 0;
+
+ if (x == ((uint64_t) -1)) {
+ y = (uint64_t) -1;
+ goto out;
+ }
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ max = conf->subvolume_cnt;
+ cnt = dht_subvol_cnt (this, subvol);
- if (x == ((uint64_t) -1)) {
- y = (uint64_t) -1;
+ if (max == 1) {
+ y = x;
goto out;
}
- conf = this->private;
+ max_bits = dht_bits_for (max);
- max = conf->subvolume_cnt;
- cnt = dht_subvol_cnt (this, subvol);
+ hi_mask = ~(PRESENT_MASK >> (max_bits + 1));
- y = ((x * max) + cnt);
+ if (x & hi_mask) {
+ /* HUGE d_off */
+ off_mask = MASK << max_bits;
+ y = TOP_BIT | ((x >> SHIFT_BITS) & off_mask) | cnt;
+ } else {
+ /* small d_off */
+ y = ((x * max) + cnt);
+ }
out:
- if (y_p)
- *y_p = y;
+ if (y_p)
+ *y_p = y;
- return 0;
+ return 0;
}
+int
+dht_filter_loc_subvol_key (xlator_t *this, loc_t *loc, loc_t *new_loc,
+ xlator_t **subvol)
+{
+ char *new_name = NULL;
+ char *new_path = NULL;
+ xlator_list_t *trav = NULL;
+ char key[1024] = {0,};
+ int ret = 0; /* not found */
+
+ /* Why do other tasks if first required 'char' itself is not there */
+ if (!new_loc || !loc || !loc->name || !strchr (loc->name, '@'))
+ goto out;
+
+ trav = this->children;
+ while (trav) {
+ snprintf (key, 1024, "*@%s:%s", this->name, trav->xlator->name);
+ if (fnmatch (key, loc->name, FNM_NOESCAPE) == 0) {
+ new_name = GF_CALLOC(strlen (loc->name),
+ sizeof (char),
+ gf_common_mt_char);
+ if (!new_name)
+ goto out;
+ if (fnmatch (key, loc->path, FNM_NOESCAPE) == 0) {
+ new_path = GF_CALLOC(strlen (loc->path),
+ sizeof (char),
+ gf_common_mt_char);
+ if (!new_path)
+ goto out;
+ strncpy (new_path, loc->path, (strlen (loc->path) -
+ strlen (key) + 1));
+ }
+ strncpy (new_name, loc->name, (strlen (loc->name) -
+ strlen (key) + 1));
+
+ if (new_loc) {
+ new_loc->path = ((new_path) ? new_path:
+ gf_strdup (loc->path));
+ new_loc->name = new_name;
+ new_loc->inode = inode_ref (loc->inode);
+ new_loc->parent = inode_ref (loc->parent);
+ }
+ *subvol = trav->xlator;
+ ret = 1; /* success */
+ goto out;
+ }
+ trav = trav->next;
+ }
+out:
+ if (!ret) {
+ /* !success */
+ GF_FREE (new_path);
+ GF_FREE (new_name);
+ }
+ return ret;
+}
int
dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol_p,
- uint64_t *x_p)
+ uint64_t *x_p)
{
- dht_conf_t *conf = NULL;
- int cnt = 0;
- int max = 0;
- uint64_t x = 0;
- xlator_t *subvol = 0;
+ dht_conf_t *conf = NULL;
+ int cnt = 0;
+ int max = 0;
+ uint64_t x = 0;
+ xlator_t *subvol = 0;
+ int max_bits = 0;
+ uint64_t off_mask = 0;
+ uint64_t host_mask = 0;
+
+ if (!this->private)
+ return -1;
+
+ conf = this->private;
+ max = conf->subvolume_cnt;
+
+ if (max == 1) {
+ x = y;
+ cnt = 0;
+ goto out;
+ }
+ if (y & TOP_BIT) {
+ /* HUGE d_off */
+ max_bits = dht_bits_for (max);
+ off_mask = (MASK << max_bits);
+ host_mask = ~(off_mask);
- conf = this->private;
- max = conf->subvolume_cnt;
+ x = ((y & ~TOP_BIT) & off_mask) << SHIFT_BITS;
- cnt = y % max;
- x = y / max;
+ cnt = y & host_mask;
+ } else {
+ /* small d_off */
+ cnt = y % max;
+ x = y / max;
+ }
- subvol = conf->subvolumes[cnt];
+out:
+ subvol = conf->subvolumes[cnt];
- if (subvol_p)
- *subvol_p = subvol;
+ if (subvol_p)
+ *subvol_p = subvol;
- if (x_p)
- *x_p = x;
+ if (x_p)
+ *x_p = x;
- return 0;
+ return 0;
}
void
-dht_local_wipe (dht_local_t *local)
+dht_local_wipe (xlator_t *this, dht_local_t *local)
{
- if (!local)
- return;
+ if (!local)
+ return;
- loc_wipe (&local->loc);
- loc_wipe (&local->loc2);
+ loc_wipe (&local->loc);
+ loc_wipe (&local->loc2);
- if (local->xattr)
- dict_unref (local->xattr);
+ if (local->xattr)
+ dict_unref (local->xattr);
- if (local->inode)
- inode_unref (local->inode);
+ if (local->inode)
+ inode_unref (local->inode);
- if (local->layout)
- FREE (local->layout);
+ if (local->layout) {
+ dht_layout_unref (this, local->layout);
+ local->layout = NULL;
+ }
- loc_wipe (&local->linkfile.loc);
+ loc_wipe (&local->linkfile.loc);
- if (local->linkfile.xattr)
- dict_unref (local->linkfile.xattr);
+ if (local->linkfile.xattr)
+ dict_unref (local->linkfile.xattr);
- if (local->linkfile.inode)
- inode_unref (local->linkfile.inode);
+ if (local->linkfile.inode)
+ inode_unref (local->linkfile.inode);
- if (local->fd) {
- fd_unref (local->fd);
- local->fd = NULL;
- }
-
- if (local->xattr_req)
- dict_unref (local->xattr_req);
+ if (local->fd) {
+ fd_unref (local->fd);
+ local->fd = NULL;
+ }
+
+ if (local->params) {
+ dict_unref (local->params);
+ local->params = NULL;
+ }
+
+ if (local->xattr_req)
+ dict_unref (local->xattr_req);
+
+ if (local->selfheal.layout) {
+ dht_layout_unref (this, local->selfheal.layout);
+ local->selfheal.layout = NULL;
+ }
+
+ GF_FREE (local->newpath);
+
+ GF_FREE (local->key);
- FREE (local);
+ GF_FREE (local->rebalance.vector);
+
+ if (local->rebalance.iobref)
+ iobref_unref (local->rebalance.iobref);
+
+ mem_put (local);
}
dht_local_t *
-dht_local_init (call_frame_t *frame)
+dht_local_init (call_frame_t *frame, loc_t *loc, fd_t *fd, glusterfs_fop_t fop)
{
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ inode_t *inode = NULL;
+ int ret = 0;
+
+ local = mem_get0 (THIS->local_pool);
+ if (!local)
+ goto out;
- /* TODO: use mem-pool */
- local = CALLOC (1, sizeof (*local));
+ if (loc) {
+ ret = loc_copy (&local->loc, loc);
+ if (ret)
+ goto out;
- if (!local)
- return NULL;
+ inode = loc->inode;
+ }
- local->op_ret = -1;
- local->op_errno = EUCLEAN;
+ if (fd) {
+ local->fd = fd_ref (fd);
+ if (!inode)
+ inode = fd->inode;
+ }
- frame->local = local;
+ local->op_ret = -1;
+ local->op_errno = EUCLEAN;
+ local->fop = fop;
- return local;
-}
+ if (inode) {
+ local->layout = dht_layout_get (frame->this, inode);
+ local->cached_subvol = dht_subvol_get_cached (frame->this,
+ inode);
+ }
+ frame->local = local;
-char *
-basestr (const char *str)
+out:
+ if (ret) {
+ if (local)
+ mem_put (local);
+ local = NULL;
+ }
+ return local;
+}
+
+xlator_t *
+dht_first_up_subvol (xlator_t *this)
{
- char *basestr = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *child = NULL;
+ int i = 0;
+ time_t time = 0;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
- basestr = strrchr (str, '/');
- if (basestr)
- basestr ++;
+ LOCK (&conf->subvolume_lock);
+ {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvol_up_time[i]) {
+ if (!time) {
+ time = conf->subvol_up_time[i];
+ child = conf->subvolumes[i];
+ } else if (time > conf->subvol_up_time[i]) {
+ time = conf->subvol_up_time[i];
+ child = conf->subvolumes[i];
+ }
+ }
+ }
+ }
+ UNLOCK (&conf->subvolume_lock);
- return basestr;
+out:
+ return child;
}
xlator_t *
-dht_first_up_subvol (xlator_t *this)
+dht_last_up_subvol (xlator_t *this)
{
- dht_conf_t *conf = NULL;
- xlator_t *child = NULL;
- int i = 0;
-
- conf = this->private;
-
- LOCK (&conf->subvolume_lock);
- {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->subvolume_status[i]) {
- child = conf->subvolumes[i];
- break;
- }
- }
- }
- UNLOCK (&conf->subvolume_lock);
-
- return child;
+ dht_conf_t *conf = NULL;
+ xlator_t *child = NULL;
+ int i = 0;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ LOCK (&conf->subvolume_lock);
+ {
+ for (i = conf->subvolume_cnt-1; i >= 0; i--) {
+ if (conf->subvolume_status[i]) {
+ child = conf->subvolumes[i];
+ break;
+ }
+ }
+ }
+ UNLOCK (&conf->subvolume_lock);
+
+out:
+ return child;
}
xlator_t *
@@ -206,17 +426,23 @@ dht_subvol_get_hashed (xlator_t *this, loc_t *loc)
dht_layout_t *layout = NULL;
xlator_t *subvol = NULL;
- if (is_fs_root (loc)) {
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+
+ if (__is_root_gfid (loc->gfid)) {
subvol = dht_first_up_subvol (this);
goto out;
}
+ GF_VALIDATE_OR_GOTO (this->name, loc->parent, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc->name, out);
+
layout = dht_layout_get (this, loc->parent);
if (!layout) {
gf_log (this->name, GF_LOG_DEBUG,
- "layout missing path=%s parent=%"PRId64,
- loc->path, loc->parent->ino);
+ "layout missing path=%s parent=%s",
+ loc->path, uuid_utoa (loc->parent->gfid));
goto out;
}
@@ -230,6 +456,10 @@ dht_subvol_get_hashed (xlator_t *this, loc_t *loc)
}
out:
+ if (layout) {
+ dht_layout_unref (this, layout);
+ }
+
return subvol;
}
@@ -240,6 +470,8 @@ dht_subvol_get_cached (xlator_t *this, inode_t *inode)
dht_layout_t *layout = NULL;
xlator_t *subvol = NULL;
+ GF_VALIDATE_OR_GOTO (this->name, this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
layout = dht_layout_get (this, inode);
@@ -247,9 +479,13 @@ dht_subvol_get_cached (xlator_t *this, inode_t *inode)
goto out;
}
- subvol = layout->list[0].xlator;
+ subvol = layout->list[0].xlator;
out:
+ if (layout) {
+ dht_layout_unref (this, layout);
+ }
+
return subvol;
}
@@ -257,70 +493,655 @@ out:
xlator_t *
dht_subvol_next (xlator_t *this, xlator_t *prev)
{
- dht_conf_t *conf = NULL;
- int i = 0;
- xlator_t *next = NULL;
-
- conf = this->private;
-
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->subvolumes[i] == prev) {
- if ((i + 1) < conf->subvolume_cnt)
- next = conf->subvolumes[i + 1];
- break;
- }
- }
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ xlator_t *next = NULL;
- return next;
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == prev) {
+ if ((i + 1) < conf->subvolume_cnt)
+ next = conf->subvolumes[i + 1];
+ break;
+ }
+ }
+
+out:
+ return next;
}
+/* This func wraps around, if prev is actually the last subvol.
+ */
+xlator_t *
+dht_subvol_next_available (xlator_t *this, xlator_t *prev)
+{
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ xlator_t *next = NULL;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == prev) {
+ /* if prev is last in conf->subvolumes, then wrap
+ * around.
+ */
+ if ((i + 1) < conf->subvolume_cnt) {
+ next = conf->subvolumes[i + 1];
+ } else {
+ next = conf->subvolumes[0];
+ }
+ break;
+ }
+ }
+out:
+ return next;
+}
int
dht_subvol_cnt (xlator_t *this, xlator_t *subvol)
{
- int i = 0;
- int ret = -1;
- dht_conf_t *conf = NULL;
+ int i = 0;
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+ conf = this->private;
+ if (!conf)
+ goto out;
- conf = this->private;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ ret = i;
+ break;
+ }
+ }
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (subvol == conf->subvolumes[i]) {
- ret = i;
- break;
- }
- }
+out:
+ return ret;
+}
+
+
+#define set_if_greater(a, b) do { \
+ if ((a) < (b)) \
+ (a) = (b); \
+ } while (0)
+
+
+#define set_if_greater_time(a, an, b, bn) do { \
+ if (((a) < (b)) || (((a) == (b)) && ((an) < (bn)))){ \
+ (a) = (b); \
+ (an) = (bn); \
+ } \
+ } while (0) \
+
+
+int
+dht_iatt_merge (xlator_t *this, struct iatt *to,
+ struct iatt *from, xlator_t *subvol)
+{
+ if (!from || !to)
+ return 0;
+
+ to->ia_dev = from->ia_dev;
+
+ uuid_copy (to->ia_gfid, from->ia_gfid);
+
+ to->ia_ino = from->ia_ino;
+ to->ia_prot = from->ia_prot;
+ to->ia_type = from->ia_type;
+ to->ia_nlink = from->ia_nlink;
+ to->ia_rdev = from->ia_rdev;
+ to->ia_size += from->ia_size;
+ to->ia_blksize = from->ia_blksize;
+ to->ia_blocks += from->ia_blocks;
+
+ set_if_greater (to->ia_uid, from->ia_uid);
+ set_if_greater (to->ia_gid, from->ia_gid);
+
+ set_if_greater_time(to->ia_atime, to->ia_atime_nsec,
+ from->ia_atime, from->ia_atime_nsec);
+ set_if_greater_time (to->ia_mtime, to->ia_mtime_nsec,
+ from->ia_mtime, from->ia_mtime_nsec);
+ set_if_greater_time (to->ia_ctime, to->ia_ctime_nsec,
+ from->ia_ctime, from->ia_ctime_nsec);
+
+ return 0;
+}
+
+int
+dht_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name)
+{
+ if (!child) {
+ goto err;
+ }
+
+ if (strcmp (parent->path, "/") == 0)
+ gf_asprintf ((char **)&child->path, "/%s", name);
+ else
+ gf_asprintf ((char **)&child->path, "%s/%s", parent->path, name);
+
+ if (!child->path) {
+ goto err;
+ }
+
+ child->name = strrchr (child->path, '/');
+ if (child->name)
+ child->name++;
+
+ child->parent = inode_ref (parent->inode);
+ child->inode = inode_new (parent->inode->table);
+
+ if (!child->inode) {
+ goto err;
+ }
+
+ return 0;
+err:
+ loc_wipe (child);
+ return -1;
+}
+
+
+
+int
+dht_init_subvolumes (xlator_t *this, dht_conf_t *conf)
+{
+ xlator_list_t *subvols = NULL;
+ int cnt = 0;
+
+ if (!conf)
+ return -1;
+
+ for (subvols = this->children; subvols; subvols = subvols->next)
+ cnt++;
+
+ conf->subvolumes = GF_CALLOC (cnt, sizeof (xlator_t *),
+ gf_dht_mt_xlator_t);
+ if (!conf->subvolumes) {
+ return -1;
+ }
+ conf->subvolume_cnt = cnt;
+
+ cnt = 0;
+ for (subvols = this->children; subvols; subvols = subvols->next)
+ conf->subvolumes[cnt++] = subvols->xlator;
+
+ conf->subvolume_status = GF_CALLOC (cnt, sizeof (char),
+ gf_dht_mt_char);
+ if (!conf->subvolume_status) {
+ return -1;
+ }
+
+ conf->last_event = GF_CALLOC (cnt, sizeof (int),
+ gf_dht_mt_char);
+ if (!conf->last_event) {
+ return -1;
+ }
+
+ conf->subvol_up_time = GF_CALLOC (cnt, sizeof (time_t),
+ gf_dht_mt_subvol_time);
+ if (!conf->subvol_up_time) {
+ return -1;
+ }
+
+ conf->du_stats = GF_CALLOC (conf->subvolume_cnt, sizeof (dht_du_t),
+ gf_dht_mt_dht_du_t);
+ if (!conf->du_stats) {
+ return -1;
+ }
+
+ conf->decommissioned_bricks = GF_CALLOC (cnt, sizeof (xlator_t *),
+ gf_dht_mt_xlator_t);
+ if (!conf->decommissioned_bricks) {
+ return -1;
+ }
+
+ return 0;
+}
+
+
+
+
+static int
+dht_migration_complete_check_done (int op_ret, call_frame_t *frame, void *data)
+{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+
+ local->rebalance.target_op_fn (THIS, frame, op_ret);
+
+ return 0;
+}
+
+
+int
+dht_migration_complete_check_task (void *data)
+{
+ int ret = -1;
+ xlator_t *src_node = NULL;
+ xlator_t *dst_node = NULL;
+ dht_local_t *local = NULL;
+ dict_t *dict = NULL;
+ dht_layout_t *layout = NULL;
+ struct iatt stbuf = {0,};
+ xlator_t *this = NULL;
+ call_frame_t *frame = NULL;
+ loc_t tmp_loc = {0,};
+ char *path = NULL;
+ dht_conf_t *conf = NULL;
+ inode_t *inode = NULL;
+ fd_t *iter_fd = NULL;
+ uint64_t tmp_subvol = 0;
+ int open_failed = 0;
+
+ this = THIS;
+ frame = data;
+ local = frame->local;
+ conf = this->private;
+
+ src_node = local->cached_subvol;
+
+ if (!local->loc.inode && !local->fd)
+ goto out;
+
+ inode = (!local->fd) ? local->loc.inode : local->fd->inode;
+
+ /* getxattr on cached_subvol for 'linkto' value. Do path based getxattr
+ * as root:root. If a fd is already open, access check wont be done*/
+
+ if (!local->loc.inode) {
+ ret = syncop_fgetxattr (src_node, local->fd, &dict,
+ conf->link_xattr_name);
+ } else {
+ SYNCTASK_SETID (0, 0);
+ ret = syncop_getxattr (src_node, &local->loc, &dict,
+ conf->link_xattr_name);
+ SYNCTASK_SETID (frame->root->uid, frame->root->gid);
+ }
+
+ if (!ret)
+ dst_node = dht_linkfile_subvol (this, NULL, NULL, dict);
+
+ if (ret) {
+ if ((errno != ENOENT) || (!local->loc.inode)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to get the 'linkto' xattr %s",
+ local->loc.path, strerror (errno));
+ goto out;
+ }
+ /* Need to do lookup on hashed subvol, then get the file */
+ ret = syncop_lookup (this, &local->loc, NULL, &stbuf, NULL,
+ NULL);
+ if (ret)
+ goto out;
+ dst_node = dht_subvol_get_cached (this, local->loc.inode);
+ }
+
+ if (!dst_node) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to get the destination node",
+ local->loc.path);
+ ret = -1;
+ goto out;
+ }
+
+ /* lookup on dst */
+ if (local->loc.inode) {
+ ret = syncop_lookup (dst_node, &local->loc, NULL, &stbuf, NULL, NULL);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to lookup the file on %s",
+ local->loc.path, dst_node->name);
+ goto out;
+ }
+
+ if (uuid_compare (stbuf.ia_gfid, local->loc.inode->gfid)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: gfid different on the target file on %s",
+ local->loc.path, dst_node->name);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ /* update inode ctx (the layout) */
+ dht_layout_unref (this, local->layout);
+
+ ret = dht_layout_preset (this, dst_node, inode);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s: could not set preset layout for subvol %s",
+ local->loc.path, dst_node->name);
+ ret = -1;
+ goto out;
+ }
+
+ layout = dht_layout_for_subvol (this, dst_node);
+ if (!layout) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%s: no pre-set layout for subvolume %s",
+ local->loc.path, dst_node ? dst_node->name : "<nil>");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dht_layout_set (this, inode, layout);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to set the new layout",
+ local->loc.path);
+ goto out;
+ }
+
+ local->cached_subvol = dst_node;
+ ret = 0;
+
+ /* once we detect the migration complete, the inode-ctx2 is no more
+ required.. delete the ctx and also, it means, open() already
+ done on all the fd of inode */
+ ret = inode_ctx_reset1 (inode, this, &tmp_subvol);
+ if (tmp_subvol)
+ goto out;
+
+ if (list_empty (&inode->fd_list))
+ goto out;
+
+ /* perform open as root:root. There is window between linkfile
+ * creation(root:root) and setattr with the correct uid/gid
+ */
+ SYNCTASK_SETID(0, 0);
+
+ /* perform 'open()' on all the fd's present on the inode */
+ tmp_loc.inode = inode;
+ inode_path (inode, NULL, &path);
+ if (path)
+ tmp_loc.path = path;
+ list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
+ if (fd_is_anonymous (iter_fd))
+ continue;
+
+ ret = syncop_open (dst_node, &tmp_loc,
+ iter_fd->flags, iter_fd);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to open "
+ "the fd (%p, flags=0%o) on file %s @ %s",
+ iter_fd, iter_fd->flags, path, dst_node->name);
+ open_failed = 1;
+ }
+ }
+ GF_FREE (path);
+
+ SYNCTASK_SETID (frame->root->uid, frame->root->gid);
+ if (open_failed) {
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+
+ return ret;
+}
+
+int
+dht_rebalance_complete_check (xlator_t *this, call_frame_t *frame)
+{
+ int ret = -1;
+
+ ret = synctask_new (this->ctx->env, dht_migration_complete_check_task,
+ dht_migration_complete_check_done,
+ frame, frame);
+ return ret;
+}
+
+/* During 'in-progress' state, both nodes should have the file */
+static int
+dht_inprogress_check_done (int op_ret, call_frame_t *sync_frame, void *data)
+{
+ dht_local_t *local = NULL;
+
+ local = sync_frame->local;
+
+ local->rebalance.target_op_fn (THIS, sync_frame, op_ret);
+
+ return 0;
+}
+
+static int
+dht_rebalance_inprogress_task (void *data)
+{
+ int ret = -1;
+ xlator_t *src_node = NULL;
+ xlator_t *dst_node = NULL;
+ dht_local_t *local = NULL;
+ dict_t *dict = NULL;
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ char *path = NULL;
+ struct iatt stbuf = {0,};
+ loc_t tmp_loc = {0,};
+ dht_conf_t *conf = NULL;
+ inode_t *inode = NULL;
+ fd_t *iter_fd = NULL;
+ int open_failed = 0;
+
+ this = THIS;
+ frame = data;
+ local = frame->local;
+ conf = this->private;
+
+ src_node = local->cached_subvol;
+
+ if (!local->loc.inode && !local->fd)
+ goto out;
+
+ inode = (!local->fd) ? local->loc.inode : local->fd->inode;
+
+ /* getxattr on cached_subvol for 'linkto' value. Do path based getxattr
+ * as root:root. If a fd is already open, access check wont be done*/
+ if (local->loc.inode) {
+ SYNCTASK_SETID (0, 0);
+ ret = syncop_getxattr (src_node, &local->loc, &dict,
+ conf->link_xattr_name);
+ SYNCTASK_SETID (frame->root->uid, frame->root->gid);
+ } else {
+ ret = syncop_fgetxattr (src_node, local->fd, &dict,
+ conf->link_xattr_name);
+ }
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to get the 'linkto' xattr %s",
+ local->loc.path, strerror (errno));
+ goto out;
+ }
+
+ dst_node = dht_linkfile_subvol (this, NULL, NULL, dict);
+ if (!dst_node) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to get the 'linkto' xattr from dict",
+ local->loc.path);
+ ret = -1;
+ goto out;
+ }
+
+ local->rebalance.target_node = dst_node;
+
+ if (local->loc.inode) {
+ /* lookup on dst */
+ ret = syncop_lookup (dst_node, &local->loc, NULL,
+ &stbuf, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to lookup the file on %s",
+ local->loc.path, dst_node->name);
+ goto out;
+ }
+
+ if (uuid_compare (stbuf.ia_gfid, local->loc.inode->gfid)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: gfid different on the target file on %s",
+ local->loc.path, dst_node->name);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = 0;
+
+ if (list_empty (&inode->fd_list))
+ goto done;
+
+ /* perform open as root:root. There is window between linkfile
+ * creation(root:root) and setattr with the correct uid/gid
+ */
+ SYNCTASK_SETID (0, 0);
+
+ tmp_loc.inode = inode;
+ inode_path (inode, NULL, &path);
+ if (path)
+ tmp_loc.path = path;
+
+ list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
+ if (fd_is_anonymous (iter_fd))
+ continue;
+
+ ret = syncop_open (dst_node, &tmp_loc,
+ iter_fd->flags, iter_fd);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to send open "
+ "the fd (%p, flags=0%o) on file %s @ %s",
+ iter_fd, iter_fd->flags, path, dst_node->name);
+ open_failed = 1;
+ }
+ }
+ GF_FREE (path);
+
+ SYNCTASK_SETID (frame->root->uid, frame->root->gid);
+
+ if (open_failed) {
+ ret = -1;
+ goto out;
+ }
+
+done:
+ ret = dht_inode_ctx_set1 (this, inode, dst_node);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to set inode-ctx target file at %s",
+ local->loc.path, dst_node->name);
+ goto out;
+ }
- return ret;
+ ret = 0;
+out:
+ return ret;
}
+int
+dht_rebalance_in_progress_check (xlator_t *this, call_frame_t *frame)
+{
-#define set_if_greater(a, b) do { \
- if ((a) < (b)) \
- (a) = (b); \
- } while (0)
+ int ret = -1;
+
+ ret = synctask_new (this->ctx->env, dht_rebalance_inprogress_task,
+ dht_inprogress_check_done,
+ frame, frame);
+ return ret;
+}
int
-dht_stat_merge (xlator_t *this, struct stat *to,
- struct stat *from, xlator_t *subvol)
+dht_inode_ctx_layout_set (inode_t *inode, xlator_t *this,
+ dht_layout_t *layout_int)
{
- to->st_dev = from->st_dev;
+ dht_inode_ctx_t *ctx = NULL;
+ int ret = -1;
+
+ ret = dht_inode_ctx_get (inode, this, &ctx);
+ if (!ret && ctx) {
+ ctx->layout = layout_int;
+ } else {
+ ctx = GF_CALLOC (1, sizeof (*ctx), gf_dht_mt_inode_ctx_t);
+ if (!ctx)
+ return ret;
+ ctx->layout = layout_int;
+ }
+
+ ret = dht_inode_ctx_set (inode, this, ctx);
- dht_itransform (this, subvol, from->st_ino, &to->st_ino);
+ return ret;
+}
+
+int
+dht_inode_ctx_time_update (inode_t *inode, xlator_t *this, struct iatt *stat,
+ int32_t post)
+{
+ dht_inode_ctx_t *ctx = NULL;
+ dht_stat_time_t *time = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO (this->name, stat, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ ret = dht_inode_ctx_get (inode, this, &ctx);
+
+ if (ret) {
+ ctx = GF_CALLOC (1, sizeof (*ctx), gf_dht_mt_inode_ctx_t);
+ if (!ctx)
+ return -1;
+ }
- to->st_mode = from->st_mode;
- to->st_nlink = from->st_nlink;
- to->st_uid = from->st_uid;
- to->st_gid = from->st_gid;
- to->st_rdev = from->st_rdev;
- to->st_size += from->st_size;
- to->st_blksize = from->st_blksize;
- to->st_blocks += from->st_blocks;
+ time = &ctx->time;
- set_if_greater (to->st_atime, from->st_atime);
- set_if_greater (to->st_mtime, from->st_mtime);
- set_if_greater (to->st_ctime, from->st_ctime);
+ DHT_UPDATE_TIME(time->mtime, time->mtime_nsec,
+ stat->ia_mtime, stat->ia_mtime_nsec, inode, post);
+ DHT_UPDATE_TIME(time->ctime, time->ctime_nsec,
+ stat->ia_ctime, stat->ia_ctime_nsec, inode, post);
+ DHT_UPDATE_TIME(time->atime, time->atime_nsec,
+ stat->ia_atime, stat->ia_atime_nsec, inode, post);
- return 0;
+ ret = dht_inode_ctx_set (inode, this, ctx);
+out:
+ return 0;
+}
+
+int
+dht_inode_ctx_get (inode_t *inode, xlator_t *this, dht_inode_ctx_t **ctx)
+{
+ int ret = -1;
+ uint64_t ctx_int = 0;
+
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ ret = inode_ctx_get (inode, this, &ctx_int);
+
+ if (ret)
+ return ret;
+
+ if (ctx)
+ *ctx = (dht_inode_ctx_t *) ctx_int;
+out:
+ return ret;
+}
+
+int dht_inode_ctx_set (inode_t *inode, xlator_t *this, dht_inode_ctx_t *ctx)
+{
+ int ret = -1;
+ uint64_t ctx_int = 0;
+
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+ GF_VALIDATE_OR_GOTO (this->name, ctx, out);
+
+ ctx_int = (long)ctx;
+ ret = inode_ctx_set (inode, this, &ctx_int);
+out:
+ return ret;
}
diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c
new file mode 100644
index 000000000..ece84151a
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-inode-read.c
@@ -0,0 +1,1139 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "dht-common.h"
+
+int dht_access2 (xlator_t *this, call_frame_t *frame, int ret);
+int dht_readv2 (xlator_t *this, call_frame_t *frame, int ret);
+int dht_attr2 (xlator_t *this, call_frame_t *frame, int ret);
+int dht_open2 (xlator_t *this, call_frame_t *frame, int ret);
+int dht_flush2 (xlator_t *this, call_frame_t *frame, int ret);
+int dht_lk2 (xlator_t *this, call_frame_t *frame, int ret);
+int dht_fsync2 (xlator_t *this, call_frame_t *frame, int ret);
+
+int
+dht_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ prev = cookie;
+
+ local->op_errno = op_errno;
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto out;
+ }
+
+ if (!op_ret || (local->call_cnt != 1))
+ goto out;
+
+ /* rebalance would have happened */
+ local->rebalance.target_op_fn = dht_open2;
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+
+out:
+ DHT_STACK_UNWIND (open, frame, op_ret, op_errno, local->fd, xdata);
+
+ return 0;
+}
+
+int
+dht_open2 (xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ int op_errno = EINVAL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ op_errno = ENOENT;
+ if (op_ret)
+ goto out;
+
+ local->call_cnt = 2;
+ subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_open_cbk, subvol, subvol->fops->open,
+ &local->loc, local->rebalance.flags, local->fd,
+ NULL);
+ return 0;
+
+out:
+ DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+int
+dht_open (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int flags, fd_t *fd, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, loc, fd, GF_FOP_OPEN);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->rebalance.flags = flags;
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_open_cbk, subvol, subvol->fops->open,
+ loc, flags, fd, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (open, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int
+dht_file_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *stbuf, dict_t *xdata)
+{
+ xlator_t *subvol = 0;
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+ inode_t *inode = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto out;
+ }
+
+ if (local->call_cnt != 1)
+ goto out;
+
+ local->op_errno = op_errno;
+ /* Check if the rebalance phase2 is true */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (stbuf)) {
+ inode = (local->fd) ? local->fd->inode : local->loc.inode;
+ ret = dht_inode_ctx_get1 (this, inode, &subvol);
+ if (!subvol) {
+ /* Phase 2 of migration */
+ local->rebalance.target_op_fn = dht_attr2;
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ } else {
+ /* value is already set in fd_ctx, that means no need
+ to check for whether its complete or not. */
+ dht_attr2 (this, frame, 0);
+ return 0;
+ }
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (stbuf);
+ DHT_STACK_UNWIND (stat, frame, op_ret, op_errno, stbuf, xdata);
+err:
+ return 0;
+}
+
+int
+dht_attr2 (xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ int op_errno = EINVAL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ op_errno = local->op_errno;
+ if (op_ret == -1)
+ goto out;
+
+ subvol = local->cached_subvol;
+ local->call_cnt = 2;
+
+ if (local->fop == GF_FOP_FSTAT) {
+ STACK_WIND (frame, dht_file_attr_cbk, subvol,
+ subvol->fops->fstat, local->fd, NULL);
+ } else {
+ STACK_WIND (frame, dht_file_attr_cbk, subvol,
+ subvol->fops->stat, &local->loc, NULL);
+ }
+ return 0;
+
+out:
+ DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+int
+dht_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *stbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+
+ goto unlock;
+ }
+
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+
+ local->op_ret = 0;
+ }
+unlock:
+ UNLOCK (&frame->lock);
+out:
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ DHT_STACK_UNWIND (stat, frame, local->op_ret, local->op_errno,
+ &local->stbuf, xdata);
+ }
+err:
+ return 0;
+}
+
+int
+dht_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int call_cnt = 0;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_STAT);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no layout for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (IA_ISREG (loc->inode->ia_type)) {
+ local->call_cnt = 1;
+
+ subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_file_attr_cbk, subvol,
+ subvol->fops->stat, loc, xdata);
+
+ return 0;
+ }
+
+ local->call_cnt = call_cnt = layout->cnt;
+
+ for (i = 0; i < call_cnt; i++) {
+ subvol = layout->list[i].xlator;
+
+ STACK_WIND (frame, dht_attr_cbk,
+ subvol, subvol->fops->stat,
+ loc, xdata);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+dht_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int call_cnt = 0;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FSTAT);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no layout for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (IA_ISREG (fd->inode->ia_type)) {
+ local->call_cnt = 1;
+
+ subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_file_attr_cbk, subvol,
+ subvol->fops->fstat, fd, xdata);
+
+ return 0;
+ }
+
+ local->call_cnt = call_cnt = layout->cnt;
+
+ for (i = 0; i < call_cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND (frame, dht_attr_cbk,
+ subvol, subvol->fops->fstat,
+ fd, xdata);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int
+dht_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ struct iovec *vector, int count, struct iatt *stbuf,
+ struct iobref *iobref, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int ret = 0;
+ inode_t *inode = NULL;
+ xlator_t *subvol = 0;
+
+ local = frame->local;
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ /* This is already second try, no need for re-check */
+ if (local->call_cnt != 1)
+ goto out;
+
+ if ((op_ret == -1) && (op_errno != ENOENT))
+ goto out;
+
+ local->op_errno = op_errno;
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (stbuf)) {
+ /* File would be migrated to other node */
+ ret = dht_inode_ctx_get1 (this, inode, &subvol);
+ if (!subvol) {
+ local->rebalance.target_op_fn = dht_readv2;
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ } else {
+ /* value is already set in fd_ctx, that means no need
+ to check for whether its complete or not. */
+ dht_readv2 (this, frame, 0);
+ return 0;
+ }
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (stbuf);
+ DHT_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count, stbuf,
+ iobref, xdata);
+
+ return 0;
+}
+
+int
+dht_readv2 (xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ int op_errno = EINVAL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ op_errno = local->op_errno;
+ if (op_ret == -1)
+ goto out;
+
+ local->call_cnt = 2;
+ subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_readv_cbk, subvol, subvol->fops->readv,
+ local->fd, local->rebalance.size, local->rebalance.offset,
+ local->rebalance.flags, NULL);
+
+ return 0;
+
+out:
+ DHT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+dht_readv (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t off, uint32_t flags, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_READ);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->rebalance.offset = off;
+ local->rebalance.size = size;
+ local->rebalance.flags = flags;
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_readv_cbk,
+ subvol, subvol->fops->readv,
+ fd, size, off, flags, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int
+dht_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ int ret = -1;
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ call_frame_t *prev = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ if (!prev || !prev->this)
+ goto out;
+ if (local->call_cnt != 1)
+ goto out;
+ if ((op_ret == -1) && (op_errno == ENOTCONN) &&
+ IA_ISDIR(local->loc.inode->ia_type)) {
+
+ subvol = dht_subvol_next_available (this, prev->this);
+ if (!subvol)
+ goto out;
+
+ /* check if we are done with visiting every node */
+ if (subvol == local->cached_subvol) {
+ goto out;
+ }
+
+ STACK_WIND (frame, dht_access_cbk, subvol, subvol->fops->access,
+ &local->loc, local->rebalance.flags, NULL);
+ return 0;
+ }
+ if ((op_ret == -1) && (op_errno == ENOENT)) {
+ /* File would be migrated to other node */
+ local->op_errno = op_errno;
+ local->rebalance.target_op_fn = dht_access2;
+ ret = dht_rebalance_complete_check (frame->this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STACK_UNWIND (access, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+dht_access2 (xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ int op_errno = EINVAL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ op_errno = local->op_errno;
+ if (op_ret == -1)
+ goto out;
+
+ local->call_cnt = 2;
+ subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_access_cbk, subvol, subvol->fops->access,
+ &local->loc, local->rebalance.flags, NULL);
+
+ return 0;
+
+out:
+ DHT_STACK_UNWIND (access, frame, -1, op_errno, NULL);
+ return 0;
+}
+
+
+int
+dht_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_ACCESS);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.flags = mask;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_access_cbk, subvol, subvol->fops->access,
+ loc, mask, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (access, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ inode_t *inode = NULL;
+ xlator_t *subvol = 0;
+
+ local = frame->local;
+
+ local->op_errno = op_errno;
+
+ if (local->call_cnt != 1)
+ goto out;
+
+ /* If context is set, then send flush() it to the destination */
+ dht_inode_ctx_get1 (this, inode, &subvol);
+ if (subvol) {
+ dht_flush2 (this, frame, 0);
+ return 0;
+ }
+
+out:
+ DHT_STACK_UNWIND (flush, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int
+dht_flush2 (xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
+
+ dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
+
+ if (!subvol)
+ subvol = local->cached_subvol;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND (frame, dht_flush_cbk,
+ subvol, subvol->fops->flush, local->fd, NULL);
+
+ return 0;
+}
+
+
+int
+dht_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FLUSH);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_flush_cbk,
+ subvol, subvol->fops->flush, fd, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (flush, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+ inode_t *inode = NULL;
+ xlator_t *subvol = 0;
+
+ local = frame->local;
+ prev = cookie;
+
+ local->op_errno = op_errno;
+ if (op_ret == -1 && (op_errno != ENOENT)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
+ dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
+ }
+ goto out;
+ }
+
+ local->op_errno = op_errno;
+ dht_inode_ctx_get1 (this, inode, &subvol);
+ if (!subvol) {
+ local->rebalance.target_op_fn = dht_fsync2;
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
+ dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
+ dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
+
+ ret = dht_rebalance_in_progress_check (this, frame);
+ }
+
+ /* Check if the rebalance phase2 is true */
+ if (IS_DHT_MIGRATION_PHASE2 (postbuf)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ }
+ if (!ret)
+ return 0;
+ } else {
+ dht_fsync2 (this, frame, 0);
+ return 0;
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (postbuf);
+ DHT_STRIP_PHASE1_FLAGS (prebuf);
+ DHT_STACK_UNWIND (fsync, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+
+ return 0;
+}
+
+int
+dht_fsync2 (xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
+
+ dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
+ if (!subvol)
+ subvol = local->cached_subvol;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND (frame, dht_fsync_cbk, subvol, subvol->fops->fsync,
+ local->fd, local->rebalance.flags, NULL);
+
+ return 0;
+}
+
+int
+dht_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
+ dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FSYNC);
+ if (!local) {
+ op_errno = ENOMEM;
+
+ goto err;
+ }
+
+ local->call_cnt = 1;
+ local->rebalance.flags = datasync;
+
+ subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_fsync_cbk, subvol, subvol->fops->fsync,
+ fd, datasync, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+/* TODO: for 'lk()' call, we need some other special error, may be ESTALE to
+ indicate that lock migration happened on the fd, so we can consider it as
+ phase 2 of migration */
+int
+dht_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct gf_flock *flock, dict_t *xdata)
+{
+ DHT_STACK_UNWIND (lk, frame, op_ret, op_errno, flock, xdata);
+
+ return 0;
+}
+
+
+int
+dht_lk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int cmd, struct gf_flock *flock, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ subvol = dht_subvol_get_cached (this, fd->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ /* TODO: for rebalance, we need to preserve the fop arguments */
+ STACK_WIND (frame, dht_lk_cbk, subvol, subvol->fops->lk, fd,
+ cmd, flock, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+/* Symlinks are currently not migrated, so no need for any check here */
+int
+dht_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, const char *path,
+ struct iatt *stbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+ if (op_ret == -1)
+ goto err;
+
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+
+err:
+ DHT_STRIP_PHASE1_FLAGS (stbuf);
+ DHT_STACK_UNWIND (readlink, frame, op_ret, op_errno, path, stbuf, xdata);
+
+ return 0;
+}
+
+
+int
+dht_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_READLINK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_readlink_cbk,
+ subvol, subvol->fops->readlink,
+ loc, size, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+/* Currently no translators on top of 'distribute' will be using
+ * below fops, hence not implementing 'migration' related checks
+ */
+
+int
+dht_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ DHT_STACK_UNWIND (xattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+
+int
+dht_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_XATTROP);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->call_cnt = 1;
+
+ STACK_WIND (frame,
+ dht_xattrop_cbk,
+ subvol, subvol->fops->xattrop,
+ loc, flags, dict, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+dht_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ DHT_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+
+int
+dht_fxattrop (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ subvol = dht_subvol_get_cached (this, fd->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame,
+ dht_fxattrop_cbk,
+ subvol, subvol->fops->fxattrop,
+ fd, flags, dict, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+dht_inodelk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
+
+{
+ DHT_STACK_UNWIND (inodelk, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+
+int32_t
+dht_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_INODELK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->call_cnt = 1;
+
+ STACK_WIND (frame,
+ dht_inodelk_cbk,
+ subvol, subvol->fops->inodelk,
+ volume, loc, cmd, lock, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (inodelk, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+
+{
+ DHT_STACK_UNWIND (finodelk, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+
+int
+dht_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ subvol = dht_subvol_get_cached (this, fd->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+
+ STACK_WIND (frame, dht_finodelk_cbk, subvol, subvol->fops->finodelk,
+ volume, fd, cmd, lock, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (finodelk, frame, -1, op_errno, NULL);
+
+ return 0;
+}
diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c
new file mode 100644
index 000000000..4b3f3a049
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-inode-write.c
@@ -0,0 +1,1013 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "dht-common.h"
+
+int dht_writev2 (xlator_t *this, call_frame_t *frame, int ret);
+int dht_truncate2 (xlator_t *this, call_frame_t *frame, int ret);
+int dht_setattr2 (xlator_t *this, call_frame_t *frame, int ret);
+int dht_fallocate2(xlator_t *this, call_frame_t *frame, int op_ret);
+int dht_discard2(xlator_t *this, call_frame_t *frame, int op_ret);
+int dht_zerofill2(xlator_t *this, call_frame_t *frame, int op_ret);
+
+int
+dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int ret = -1;
+ xlator_t *subvol = NULL;
+
+ if (op_ret == -1 && (op_errno != ENOENT)) {
+ goto out;
+ }
+
+ local = frame->local;
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ /* preserve the modes of source */
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
+ dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
+ }
+ goto out;
+ }
+
+ local->rebalance.target_op_fn = dht_writev2;
+
+ local->op_errno = op_errno;
+ /* Phase 2 of migration */
+ if (IS_DHT_MIGRATION_PHASE2 (postbuf)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
+ dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
+ dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
+
+ ret = dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
+ if (subvol) {
+ dht_writev2 (this, frame, 0);
+ return 0;
+ }
+ ret = dht_rebalance_in_progress_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (postbuf);
+ DHT_STRIP_PHASE1_FLAGS (prebuf);
+
+ DHT_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
+}
+
+int
+dht_writev2 (xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
+
+ dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
+
+ if (!subvol)
+ subvol = local->cached_subvol;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND (frame, dht_writev_cbk,
+ subvol, subvol->fops->writev,
+ local->fd, local->rebalance.vector, local->rebalance.count,
+ local->rebalance.offset, local->rebalance.flags,
+ local->rebalance.iobref, NULL);
+
+ return 0;
+}
+
+int
+dht_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int count, off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_WRITE);
+ if (!local) {
+
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+
+ local->rebalance.vector = iov_dup (vector, count);
+ local->rebalance.offset = off;
+ local->rebalance.count = count;
+ local->rebalance.flags = flags;
+ local->rebalance.iobref = iobref_ref (iobref);
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_writev_cbk,
+ subvol, subvol->fops->writev,
+ fd, vector, count, off, flags, iobref, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+
+int
+dht_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
+ dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
+ }
+ goto out;
+ }
+
+ local->rebalance.target_op_fn = dht_truncate2;
+
+ local->op_errno = op_errno;
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
+ dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
+ dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
+ inode = (local->fd) ? local->fd->inode : local->loc.inode;
+ dht_inode_ctx_get1 (this, inode, &subvol);
+ if (subvol) {
+ dht_truncate2 (this, frame, 0);
+ return 0;
+ }
+ ret = dht_rebalance_in_progress_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (postbuf);
+ DHT_STRIP_PHASE1_FLAGS (prebuf);
+ DHT_STACK_UNWIND (truncate, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+err:
+ return 0;
+}
+
+
+int
+dht_truncate2 (xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+
+ local = frame->local;
+
+ inode = local->fd ? local->fd->inode : local->loc.inode;
+
+ dht_inode_ctx_get1 (this, inode, &subvol);
+ if (!subvol)
+ subvol = local->cached_subvol;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ if (local->fop == GF_FOP_TRUNCATE) {
+ STACK_WIND (frame, dht_truncate_cbk, subvol,
+ subvol->fops->truncate, &local->loc,
+ local->rebalance.offset, NULL);
+ } else {
+ STACK_WIND (frame, dht_truncate_cbk, subvol,
+ subvol->fops->ftruncate, local->fd,
+ local->rebalance.offset, NULL);
+ }
+
+ return 0;
+}
+
+int
+dht_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_TRUNCATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.offset = offset;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_truncate_cbk,
+ subvol, subvol->fops->truncate,
+ loc, offset, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int
+dht_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FTRUNCATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.offset = offset;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_truncate_cbk,
+ subvol, subvol->fops->ftruncate,
+ fd, offset, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+dht_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+ xlator_t *subvol = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
+ dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
+ }
+ goto out;
+ }
+ local->rebalance.target_op_fn = dht_fallocate2;
+
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
+ dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
+ dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
+ dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
+ if (subvol) {
+ dht_fallocate2 (this, frame, 0);
+ return 0;
+ }
+ ret = dht_rebalance_in_progress_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (postbuf);
+ DHT_STRIP_PHASE1_FLAGS (prebuf);
+ DHT_STACK_UNWIND (fallocate, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+err:
+ return 0;
+}
+
+int
+dht_fallocate2(xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
+
+ dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
+
+ if (!subvol)
+ subvol = local->cached_subvol;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND(frame, dht_fallocate_cbk, subvol, subvol->fops->fallocate,
+ local->fd, local->rebalance.flags, local->rebalance.offset,
+ local->rebalance.size, NULL);
+
+ return 0;
+}
+
+int
+dht_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FALLOCATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.flags = mode;
+ local->rebalance.offset = offset;
+ local->rebalance.size = len;
+
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_fallocate_cbk,
+ subvol, subvol->fops->fallocate,
+ fd, mode, offset, len, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+dht_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+ xlator_t *subvol = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
+ dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
+ }
+ goto out;
+ }
+ local->rebalance.target_op_fn = dht_discard2;
+
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
+ dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
+ dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
+ dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
+ if (subvol) {
+ dht_discard2 (this, frame, 0);
+ return 0;
+ }
+ ret = dht_rebalance_in_progress_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (postbuf);
+ DHT_STRIP_PHASE1_FLAGS (prebuf);
+ DHT_STACK_UNWIND (discard, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+err:
+ return 0;
+}
+
+int
+dht_discard2(xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
+
+ dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
+
+ if (!subvol)
+ subvol = local->cached_subvol;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND(frame, dht_discard_cbk, subvol, subvol->fops->discard,
+ local->fd, local->rebalance.offset, local->rebalance.size,
+ NULL);
+
+ return 0;
+}
+
+int
+dht_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_DISCARD);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.offset = offset;
+ local->rebalance.size = len;
+
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_discard_cbk, subvol, subvol->fops->discard,
+ fd, offset, len, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (discard, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int
+dht_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
+ dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
+ }
+ goto out;
+ }
+ local->rebalance.target_op_fn = dht_zerofill2;
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
+ dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
+ dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
+ ret = fd_ctx_get (local->fd, this, NULL);
+ if (!ret) {
+ dht_zerofill2 (this, frame, 0);
+ return 0;
+ }
+ ret = dht_rebalance_in_progress_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (postbuf);
+ DHT_STRIP_PHASE1_FLAGS (prebuf);
+ DHT_STACK_UNWIND (zerofill, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+err:
+ return 0;
+}
+
+int
+dht_zerofill2(xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ uint64_t tmp_subvol = 0;
+ int ret = -1;
+
+ local = frame->local;
+
+ if (local->fd)
+ ret = fd_ctx_get (local->fd, this, &tmp_subvol);
+ if (!ret)
+ subvol = (xlator_t *)(long)tmp_subvol;
+
+ if (!subvol)
+ subvol = local->cached_subvol;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND(frame, dht_zerofill_cbk, subvol, subvol->fops->zerofill,
+ local->fd, local->rebalance.offset, local->rebalance.size,
+ NULL);
+
+ return 0;
+}
+
+int
+dht_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_ZEROFILL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.offset = offset;
+ local->rebalance.size = len;
+
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_zerofill_cbk, subvol, subvol->fops->zerofill,
+ fd, offset, len, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+
+/* handle cases of migration here for 'setattr()' calls */
+int
+dht_file_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+
+ local = frame->local;
+ prev = cookie;
+
+ local->op_errno = op_errno;
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto out;
+ }
+
+ if (local->call_cnt != 1)
+ goto out;
+
+ local->rebalance.target_op_fn = dht_setattr2;
+
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* At the end of the migration process, whatever 'attr' we
+ have on source file will be migrated to destination file
+ in one shot, hence we don't need to check for in progress
+ state here (ie, PHASE1) */
+out:
+ DHT_STRIP_PHASE1_FLAGS (postbuf);
+ DHT_STRIP_PHASE1_FLAGS (prebuf);
+ DHT_STACK_UNWIND (setattr, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+
+ return 0;
+}
+
+int
+dht_setattr2 (xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+
+ local = frame->local;
+
+ inode = (local->fd) ? local->fd->inode : local->loc.inode;
+
+ dht_inode_ctx_get1 (this, inode, &subvol);
+
+ if (!subvol)
+ subvol = local->cached_subvol;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ if (local->fop == GF_FOP_SETATTR) {
+ STACK_WIND (frame, dht_file_setattr_cbk, subvol,
+ subvol->fops->setattr, &local->loc,
+ &local->rebalance.stbuf, local->rebalance.flags,
+ NULL);
+ } else {
+ STACK_WIND (frame, dht_file_setattr_cbk, subvol,
+ subvol->fops->fsetattr, local->fd,
+ &local->rebalance.stbuf, local->rebalance.flags,
+ NULL);
+ }
+
+ return 0;
+}
+
+
+/* Keep the existing code same for all the cases other than regular file */
+int
+dht_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+
+
+ local = frame->local;
+ prev = cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto unlock;
+ }
+
+ dht_iatt_merge (this, &local->prebuf, statpre, prev->this);
+ dht_iatt_merge (this, &local->stbuf, statpost, prev->this);
+
+ local->op_ret = 0;
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt))
+ DHT_STACK_UNWIND (setattr, frame, local->op_ret, local->op_errno,
+ &local->prebuf, &local->stbuf, xdata);
+
+ return 0;
+}
+
+
+int
+dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+ int i = -1;
+ int call_cnt = 0;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_SETATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no layout for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (!layout_is_sane (layout)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "layout is not sane for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (IA_ISREG (loc->inode->ia_type)) {
+ /* in the regular file _cbk(), we need to check for
+ migration possibilities */
+ local->rebalance.stbuf = *stbuf;
+ local->rebalance.flags = valid;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_file_setattr_cbk, subvol,
+ subvol->fops->setattr,
+ loc, stbuf, valid, xdata);
+
+ return 0;
+ }
+
+ local->call_cnt = call_cnt = layout->cnt;
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_setattr_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->setattr,
+ loc, stbuf, valid, xdata);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+ int i = -1;
+ int call_cnt = 0;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FSETATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no layout for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (!layout_is_sane (layout)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "layout is not sane for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (IA_ISREG (fd->inode->ia_type)) {
+ /* in the regular file _cbk(), we need to check for
+ migration possibilities */
+ local->rebalance.stbuf = *stbuf;
+ local->rebalance.flags = valid;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_file_setattr_cbk, subvol,
+ subvol->fops->fsetattr,
+ fd, stbuf, valid, xdata);
+
+ return 0;
+ }
+
+ local->call_cnt = call_cnt = layout->cnt;
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_setattr_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->fsetattr,
+ fd, stbuf, valid, xdata);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c
index f2e3de14b..38e9970a7 100644
--- a/xlators/cluster/dht/src/dht-layout.c
+++ b/xlators/cluster/dht/src/dht-layout.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -38,583 +29,763 @@
dht_layout_t *
dht_layout_new (xlator_t *this, int cnt)
{
- dht_layout_t *layout = NULL;
+ dht_layout_t *layout = NULL;
dht_conf_t *conf = NULL;
conf = this->private;
- layout = CALLOC (1, layout_size (cnt));
- if (!layout) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto out;
- }
+ layout = GF_CALLOC (1, layout_size (cnt),
+ gf_dht_mt_dht_layout_t);
+ if (!layout) {
+ goto out;
+ }
- layout->cnt = cnt;
- if (conf)
+ layout->type = DHT_HASH_TYPE_DM;
+ layout->cnt = cnt;
+
+ if (conf) {
+ layout->spread_cnt = conf->dir_spread_cnt;
layout->gen = conf->gen;
+ }
+
+ layout->ref = 1;
out:
- return layout;
+ return layout;
}
dht_layout_t *
dht_layout_get (xlator_t *this, inode_t *inode)
{
- uint64_t layout = 0;
- int ret = -1;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *layout = NULL;
- ret = inode_ctx_get (inode, this, &layout);
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ LOCK (&conf->layout_lock);
+ {
+ dht_inode_ctx_layout_get (inode, this, &layout);
+ if (layout) {
+ layout->ref++;
+ }
+ }
+ UNLOCK (&conf->layout_lock);
- return (dht_layout_t *)(long)layout;
+out:
+ return layout;
}
-xlator_t *
-dht_layout_search (xlator_t *this, dht_layout_t *layout, const char *name)
+int
+dht_layout_set (xlator_t *this, inode_t *inode, dht_layout_t *layout)
{
- uint32_t hash = 0;
- xlator_t *subvol = NULL;
- int i = 0;
- int ret = 0;
+ dht_conf_t *conf = NULL;
+ int oldret = -1;
+ int ret = 0;
+ dht_layout_t *old_layout;
+ conf = this->private;
+ if (!conf)
+ goto out;
- ret = dht_hash_compute (layout->type, name, &hash);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "hash computation failed for type=%d name=%s",
- layout->type, name);
- goto out;
- }
+ LOCK (&conf->layout_lock);
+ {
+ oldret = dht_inode_ctx_layout_get (inode, this, &old_layout);
+ layout->ref++;
+ dht_inode_ctx_layout_set (inode, this, layout);
+ }
+ UNLOCK (&conf->layout_lock);
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].start <= hash
- && layout->list[i].stop >= hash) {
- subvol = layout->list[i].xlator;
- break;
- }
- }
-
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume for hash (value) = %u", hash);
- }
+ if (!oldret) {
+ dht_layout_unref (this, old_layout);
+ }
out:
- return subvol;
+ return ret;
}
-dht_layout_t *
-dht_layout_for_subvol (xlator_t *this, xlator_t *subvol)
+void
+dht_layout_unref (xlator_t *this, dht_layout_t *layout)
{
- dht_conf_t *conf = NULL;
- dht_layout_t *layout = NULL;
- int i = 0;
+ dht_conf_t *conf = NULL;
+ int ref = 0;
+ if (!layout || layout->preset || !this->private)
+ return;
- conf = this->private;
+ conf = this->private;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->subvolumes[i] == subvol) {
- layout = conf->file_layouts[i];
- break;
- }
- }
+ LOCK (&conf->layout_lock);
+ {
+ ref = --layout->ref;
+ }
+ UNLOCK (&conf->layout_lock);
- return layout;
+ if (!ref)
+ GF_FREE (layout);
}
-int
-dht_layouts_init (xlator_t *this, dht_conf_t *conf)
+dht_layout_t *
+dht_layout_ref (xlator_t *this, dht_layout_t *layout)
{
- dht_layout_t *layout = NULL;
- int i = 0;
- int ret = -1;
-
-
- conf->file_layouts = CALLOC (conf->subvolume_cnt,
- sizeof (dht_layout_t *));
- if (!conf->file_layouts) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto out;
- }
+ dht_conf_t *conf = NULL;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- layout = dht_layout_new (this, 1);
+ if (layout->preset || !this->private)
+ return layout;
- if (!layout) {
- goto out;
- }
+ conf = this->private;
+ LOCK (&conf->layout_lock);
+ {
+ layout->ref++;
+ }
+ UNLOCK (&conf->layout_lock);
- layout->preset = 1;
+ return layout;
+}
- layout->list[0].xlator = conf->subvolumes[i];
- conf->file_layouts[i] = layout;
- }
+xlator_t *
+dht_layout_search (xlator_t *this, dht_layout_t *layout, const char *name)
+{
+ uint32_t hash = 0;
+ xlator_t *subvol = NULL;
+ int i = 0;
+ int ret = 0;
+
+
+ ret = dht_hash_compute (this, layout->type, name, &hash);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "hash computation failed for type=%d name=%s",
+ layout->type, name);
+ goto out;
+ }
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].start <= hash
+ && layout->list[i].stop >= hash) {
+ subvol = layout->list[i].xlator;
+ break;
+ }
+ }
+
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "no subvolume for hash (value) = %u", hash);
+ }
- ret = 0;
out:
- return ret;
+ return subvol;
}
-int
-dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout,
- int pos, int32_t **disk_layout_p)
+dht_layout_t *
+dht_layout_for_subvol (xlator_t *this, xlator_t *subvol)
{
- int ret = -1;
- int32_t *disk_layout = NULL;
-
- disk_layout = CALLOC (5, sizeof (int));
- if (!disk_layout) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto out;
- }
+ dht_conf_t *conf = NULL;
+ dht_layout_t *layout = NULL;
+ int i = 0;
- disk_layout[0] = hton32 (1);
- disk_layout[1] = hton32 (layout->type);
- disk_layout[2] = hton32 (layout->list[pos].start);
- disk_layout[3] = hton32 (layout->list[pos].stop);
+ conf = this->private;
+ if (!conf)
+ goto out;
- if (disk_layout_p)
- *disk_layout_p = disk_layout;
- ret = 0;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == subvol) {
+ layout = conf->file_layouts[i];
+ break;
+ }
+ }
out:
- return ret;
+ return layout;
}
int
-dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout,
- int pos, int32_t *disk_layout)
+dht_layouts_init (xlator_t *this, dht_conf_t *conf)
{
- int cnt = 0;
- int type = 0;
- int start_off = 0;
- int stop_off = 0;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int ret = -1;
- /* TODO: assert disk_layout_ptr is of required length */
+ if (!conf)
+ goto out;
- cnt = ntoh32 (disk_layout[0]);
- if (cnt != 1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "disk layout has invalid count %d", cnt);
- return -1;
- }
+ conf->file_layouts = GF_CALLOC (conf->subvolume_cnt,
+ sizeof (dht_layout_t *),
+ gf_dht_mt_dht_layout_t);
+ if (!conf->file_layouts) {
+ goto out;
+ }
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ layout = dht_layout_new (this, 1);
+
+ if (!layout) {
+ goto out;
+ }
- /* TODO: assert type is compatible */
- type = ntoh32 (disk_layout[1]);
- start_off = ntoh32 (disk_layout[2]);
- stop_off = ntoh32 (disk_layout[3]);
+ layout->preset = 1;
- layout->list[pos].start = start_off;
- layout->list[pos].stop = stop_off;
+ layout->list[0].xlator = conf->subvolumes[i];
- gf_log (this->name, GF_LOG_TRACE,
- "merged to layout: %u - %u (type %d) from %s",
- start_off, stop_off, type,
- layout->list[pos].xlator->name);
+ conf->file_layouts[i] = layout;
+ }
- return 0;
+ ret = 0;
+out:
+ return ret;
}
int
-dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
- int op_ret, int op_errno, dict_t *xattr)
+dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout,
+ int pos, int32_t **disk_layout_p)
{
- int i = 0;
- int ret = -1;
- int err = -1;
- int32_t *disk_layout = NULL;
+ int ret = -1;
+ int32_t *disk_layout = NULL;
+ disk_layout = GF_CALLOC (5, sizeof (int),
+ gf_dht_mt_int32_t);
+ if (!disk_layout) {
+ goto out;
+ }
- if (op_ret != 0) {
- err = op_errno;
- }
+ disk_layout[0] = hton32 (1);
+ disk_layout[1] = hton32 (layout->type);
+ disk_layout[2] = hton32 (layout->list[pos].start);
+ disk_layout[3] = hton32 (layout->list[pos].stop);
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].xlator == NULL) {
- layout->list[i].err = err;
- layout->list[i].xlator = subvol;
- break;
- }
- }
+ if (disk_layout_p)
+ *disk_layout_p = disk_layout;
+ else
+ GF_FREE (disk_layout);
- if (op_ret != 0) {
- ret = 0;
- goto out;
- }
+ ret = 0;
+
+out:
+ return ret;
+}
- if (xattr) {
- /* during lookup and not mkdir */
- ret = dict_get_ptr (xattr, "trusted.glusterfs.dht",
- VOID(&disk_layout));
- }
- if (ret != 0) {
- layout->list[i].err = -1;
- gf_log (this->name, GF_LOG_TRACE,
- "missing disk layout on %s. err = %d",
- subvol->name, err);
- ret = 0;
- goto out;
+int
+dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout,
+ int pos, void *disk_layout_raw, int disk_layout_len)
+{
+ int cnt = 0;
+ int type = 0;
+ int start_off = 0;
+ int stop_off = 0;
+ int disk_layout[4];
+
+ if (!disk_layout_raw) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "error no layout on disk for merge");
+ return -1;
}
- ret = dht_disk_layout_merge (this, layout, i, disk_layout);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "layout merge from subvolume %s failed",
- subvol->name);
- goto out;
+ GF_ASSERT (disk_layout_len == sizeof (disk_layout));
+
+ memcpy (disk_layout, disk_layout_raw, disk_layout_len);
+
+ cnt = ntoh32 (disk_layout[0]);
+ if (cnt != 1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "disk layout has invalid count %d", cnt);
+ return -1;
+ }
+
+ type = ntoh32 (disk_layout[1]);
+ switch (type) {
+ case DHT_HASH_TYPE_DM_USER:
+ gf_log (this->name, GF_LOG_DEBUG, "found user-set layout");
+ layout->type = type;
+ /* Fall through. */
+ case DHT_HASH_TYPE_DM:
+ break;
+ default:
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Catastrophic error layout with unknown type found %d",
+ disk_layout[1]);
+ return -1;
}
- layout->list[i].err = 0;
+
+ start_off = ntoh32 (disk_layout[2]);
+ stop_off = ntoh32 (disk_layout[3]);
+
+ layout->list[pos].start = start_off;
+ layout->list[pos].stop = stop_off;
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "merged to layout: %u - %u (type %d) from %s",
+ start_off, stop_off, type,
+ layout->list[pos].xlator->name);
+
+ return 0;
+}
+
+
+int
+dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
+ int op_ret, int op_errno, dict_t *xattr)
+{
+ int i = 0;
+ int ret = -1;
+ int err = -1;
+ void *disk_layout_raw = NULL;
+ int disk_layout_len = 0;
+ dht_conf_t *conf = this->private;
+
+ if (op_ret != 0) {
+ err = op_errno;
+ }
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].xlator == NULL) {
+ layout->list[i].err = err;
+ layout->list[i].xlator = subvol;
+ break;
+ }
+ }
+
+ if (op_ret != 0) {
+ ret = 0;
+ goto out;
+ }
+
+ if (xattr) {
+ /* during lookup and not mkdir */
+ ret = dict_get_ptr_and_len (xattr, conf->xattr_name,
+ &disk_layout_raw, &disk_layout_len);
+ }
+
+ if (ret != 0) {
+ layout->list[i].err = 0;
+ gf_log (this->name, GF_LOG_TRACE,
+ "missing disk layout on %s. err = %d",
+ subvol->name, err);
+ ret = 0;
+ goto out;
+ }
+
+ ret = dht_disk_layout_merge (this, layout, i, disk_layout_raw,
+ disk_layout_len);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "layout merge from subvolume %s failed",
+ subvol->name);
+ goto out;
+ }
+ layout->list[i].err = 0;
out:
- return ret;
+ return ret;
}
void
dht_layout_entry_swap (dht_layout_t *layout, int i, int j)
{
- uint32_t start_swap = 0;
- uint32_t stop_swap = 0;
- xlator_t *xlator_swap = 0;
- int err_swap = 0;
-
-
- start_swap = layout->list[i].start;
- stop_swap = layout->list[i].stop;
- xlator_swap = layout->list[i].xlator;
- err_swap = layout->list[i].err;
-
- layout->list[i].start = layout->list[j].start;
- layout->list[i].stop = layout->list[j].stop;
- layout->list[i].xlator = layout->list[j].xlator;
- layout->list[i].err = layout->list[j].err;
-
- layout->list[j].start = start_swap;
- layout->list[j].stop = stop_swap;
- layout->list[j].xlator = xlator_swap;
- layout->list[j].err = err_swap;
+ uint32_t start_swap = 0;
+ uint32_t stop_swap = 0;
+ xlator_t *xlator_swap = 0;
+ int err_swap = 0;
+
+ start_swap = layout->list[i].start;
+ stop_swap = layout->list[i].stop;
+ xlator_swap = layout->list[i].xlator;
+ err_swap = layout->list[i].err;
+
+ layout->list[i].start = layout->list[j].start;
+ layout->list[i].stop = layout->list[j].stop;
+ layout->list[i].xlator = layout->list[j].xlator;
+ layout->list[i].err = layout->list[j].err;
+
+ layout->list[j].start = start_swap;
+ layout->list[j].stop = stop_swap;
+ layout->list[j].xlator = xlator_swap;
+ layout->list[j].err = err_swap;
+}
+
+void
+dht_layout_range_swap (dht_layout_t *layout, int i, int j)
+{
+ uint32_t start_swap = 0;
+ uint32_t stop_swap = 0;
+
+ start_swap = layout->list[i].start;
+ stop_swap = layout->list[i].stop;
+
+ layout->list[i].start = layout->list[j].start;
+ layout->list[i].stop = layout->list[j].stop;
+
+ layout->list[j].start = start_swap;
+ layout->list[j].stop = stop_swap;
}
int64_t
dht_layout_entry_cmp_volname (dht_layout_t *layout, int i, int j)
{
- return (strcmp (layout->list[i].xlator->name,
- layout->list[j].xlator->name));
+ return (strcmp (layout->list[i].xlator->name,
+ layout->list[j].xlator->name));
+}
+
+
+gf_boolean_t
+dht_is_subvol_in_layout (dht_layout_t *layout, xlator_t *xlator)
+{
+ int i = 0;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (!strcmp (layout->list[i].xlator->name, xlator->name))
+ return _gf_true;
+ }
+ return _gf_false;
}
int64_t
dht_layout_entry_cmp (dht_layout_t *layout, int i, int j)
{
- int64_t diff = 0;
-
- if (layout->list[i].err || layout->list[j].err)
- diff = layout->list[i].err - layout->list[j].err;
- else
- diff = (int64_t) layout->list[i].start
- - (int64_t) layout->list[j].start;
+ int64_t diff = 0;
+
+ /* swap zero'ed out layouts to front, if needed */
+ if (!layout->list[j].start && !layout->list[j].stop) {
+ diff = (int64_t) layout->list[i].stop
+ - (int64_t) layout->list[j].stop;
+ goto out;
+ }
+ if (layout->list[i].err || layout->list[j].err)
+ diff = layout->list[i].err - layout->list[j].err;
+ else
+ diff = (int64_t) layout->list[i].start
+ - (int64_t) layout->list[j].start;
- return diff;
+out:
+ return diff;
}
int
dht_layout_sort (dht_layout_t *layout)
{
- int i = 0;
- int j = 0;
- int64_t ret = 0;
-
- /* TODO: O(n^2) -- bad bad */
-
- for (i = 0; i < layout->cnt - 1; i++) {
- for (j = i + 1; j < layout->cnt; j++) {
- ret = dht_layout_entry_cmp (layout, i, j);
- if (ret > 0)
- dht_layout_entry_swap (layout, i, j);
- }
- }
+ int i = 0;
+ int j = 0;
+ int64_t ret = 0;
- return 0;
+ /* TODO: O(n^2) -- bad bad */
+
+ for (i = 0; i < layout->cnt - 1; i++) {
+ for (j = i + 1; j < layout->cnt; j++) {
+ ret = dht_layout_entry_cmp (layout, i, j);
+ if (ret > 0)
+ dht_layout_entry_swap (layout, i, j);
+ }
+ }
+
+ return 0;
}
int
dht_layout_sort_volname (dht_layout_t *layout)
{
- int i = 0;
- int j = 0;
- int64_t ret = 0;
-
- /* TODO: O(n^2) -- bad bad */
-
- for (i = 0; i < layout->cnt - 1; i++) {
- for (j = i + 1; j < layout->cnt; j++) {
- ret = dht_layout_entry_cmp_volname (layout, i, j);
- if (ret > 0)
- dht_layout_entry_swap (layout, i, j);
- }
- }
+ int i = 0;
+ int j = 0;
+ int64_t ret = 0;
+
+ /* TODO: O(n^2) -- bad bad */
- return 0;
+ for (i = 0; i < layout->cnt - 1; i++) {
+ for (j = i + 1; j < layout->cnt; j++) {
+ ret = dht_layout_entry_cmp_volname (layout, i, j);
+ if (ret > 0)
+ dht_layout_entry_swap (layout, i, j);
+ }
+ }
+
+ return 0;
}
int
dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
- uint32_t *holes_p, uint32_t *overlaps_p,
- uint32_t *missing_p, uint32_t *down_p, uint32_t *misc_p)
+ uint32_t *holes_p, uint32_t *overlaps_p,
+ uint32_t *missing_p, uint32_t *down_p, uint32_t *misc_p,
+ uint32_t *no_space_p)
{
- dht_conf_t *conf = NULL;
- uint32_t holes = 0;
- uint32_t overlaps = 0;
- uint32_t missing = 0;
- uint32_t down = 0;
- uint32_t misc = 0;
- uint32_t hole_cnt = 0;
- uint32_t overlap_cnt = 0;
- int i = 0;
- int ret = 0;
- uint32_t prev_stop = 0;
- uint32_t last_stop = 0;
- char is_virgin = 1;
-
-
- conf = this->private;
-
- /* TODO: explain WTF is happening */
-
- last_stop = layout->list[0].start - 1;
- prev_stop = last_stop;
-
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err) {
- switch (layout->list[i].err) {
- case -1:
- case ENOENT:
- missing++;
- break;
- case ENOTCONN:
- down++;
- break;
- case ENOSPC:
- down++;
- break;
- default:
- misc++;
- }
- continue;
- }
-
- is_virgin = 0;
-
- if ((prev_stop + 1) < layout->list[i].start) {
- hole_cnt++;
- holes += (layout->list[i].start - (prev_stop + 1));
- }
-
- if ((prev_stop + 1) > layout->list[i].start) {
- overlap_cnt++;
- overlaps += ((prev_stop + 1) - layout->list[i].start);
- }
- prev_stop = layout->list[i].stop;
- }
+ uint32_t overlaps = 0;
+ uint32_t missing = 0;
+ uint32_t down = 0;
+ uint32_t misc = 0;
+ uint32_t hole_cnt = 0;
+ uint32_t overlap_cnt = 0;
+ int i = 0;
+ int ret = 0;
+ uint32_t prev_stop = 0;
+ uint32_t last_stop = 0;
+ char is_virgin = 1;
+ uint32_t no_space = 0;
+
+ /* TODO: explain what is happening */
+
+ last_stop = layout->list[0].start - 1;
+ prev_stop = last_stop;
+
+ for (i = 0; i < layout->cnt; i++) {
+ switch (layout->list[i].err) {
+ case -1:
+ case ENOENT:
+ missing++;
+ continue;
+ case ENOTCONN:
+ down++;
+ continue;
+ case ENOSPC:
+ no_space++;
+ continue;
+ case 0:
+ /* if err == 0 and start == stop, then it is a non misc++;
+ * participating subvolume(spread-cnt). Then, do not
+ * check for anomalies. If start != stop, then treat it
+ * as misc err */
+ if (layout->list[i].start == layout->list[i].stop) {
+ continue;
+ }
+ break;
+ default:
+ misc++;
+ continue;
+ }
+
+ is_virgin = 0;
+
+ if ((prev_stop + 1) < layout->list[i].start) {
+ hole_cnt++;
+ }
- if ((last_stop - prev_stop) || is_virgin)
- hole_cnt++;
- holes += (last_stop - prev_stop);
+ if ((prev_stop + 1) > layout->list[i].start) {
+ overlap_cnt++;
+ overlaps += ((prev_stop + 1) - layout->list[i].start);
+ }
+ prev_stop = layout->list[i].stop;
+ }
+
+ if ((last_stop - prev_stop) || is_virgin)
+ hole_cnt++;
+
+ if (holes_p)
+ *holes_p = hole_cnt;
- if (holes_p)
- *holes_p = hole_cnt;
+ if (overlaps_p)
+ *overlaps_p = overlap_cnt;
- if (overlaps_p)
- *overlaps_p = overlap_cnt;
+ if (missing_p)
+ *missing_p = missing;
- if (missing_p)
- *missing_p = missing;
+ if (down_p)
+ *down_p = down;
- if (down_p)
- *down_p = down;
+ if (misc_p)
+ *misc_p = misc;
- if (misc_p)
- *misc_p = misc;
+ if (no_space_p)
+ *no_space_p = no_space;
- return ret;
+ return ret;
}
int
dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout)
{
- int ret = 0;
- int i = 0;
- uint32_t holes = 0;
- uint32_t overlaps = 0;
- uint32_t missing = 0;
- uint32_t down = 0;
- uint32_t misc = 0;
-
-
- ret = dht_layout_sort (layout);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "sort failed?! how the ....");
- goto out;
- }
-
- ret = dht_layout_anomalies (this, loc, layout,
- &holes, &overlaps,
- &missing, &down, &misc);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "error while finding anomalies in %s -- not good news",
- loc->path);
- goto out;
- }
-
- if (holes || overlaps) {
- if (missing == layout->cnt) {
- gf_log (this->name, GF_LOG_DEBUG,
- "directory %s looked up first time",
- loc->path);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "found anomalies in %s. holes=%d overlaps=%d",
- loc->path, holes, overlaps);
- }
- ret = 1;
- }
+ int ret = 0;
+ int i = 0;
+ uint32_t holes = 0;
+ uint32_t overlaps = 0;
+ uint32_t missing = 0;
+ uint32_t down = 0;
+ uint32_t misc = 0;
+
+ ret = dht_layout_sort (layout);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "sort failed?! how the ....");
+ goto out;
+ }
+
+ ret = dht_layout_anomalies (this, loc, layout,
+ &holes, &overlaps,
+ &missing, &down, &misc, NULL);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "error while finding anomalies in %s -- not good news",
+ loc->path);
+ goto out;
+ }
+
+ if (holes || overlaps) {
+ if (missing == layout->cnt) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "directory %s looked up first time",
+ loc->path);
+ } else {
+ gf_log (this->name, GF_LOG_INFO,
+ "found anomalies in %s. holes=%d overlaps=%d",
+ loc->path, holes, overlaps);
+ }
+ ret = -1;
+ }
+
+ for (i = 0; i < layout->cnt; i++) {
+ /* TODO During DHT selfheal rewrite (almost) find a better place
+ * to detect this - probably in dht_layout_anomalies()
+ */
+ if (layout->list[i].err > 0) {
+ gf_log_callingfn (this->name, GF_LOG_DEBUG,
+ "path=%s err=%s on subvol=%s",
+ loc->path,
+ strerror (layout->list[i].err),
+ (layout->list[i].xlator ?
+ layout->list[i].xlator->name
+ : "<>"));
+ if ((layout->list[i].err == ENOENT) && (ret >= 0)) {
+ ret++;
+ }
+ }
+ }
- for (i = 0; i < layout->cnt; i++) {
- /* TODO During DHT selfheal rewrite (almost) find a better place to
- * detect this - probably in dht_layout_anomalies()
- */
- if (layout->list[i].err > 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "path=%s err=%s on subvol=%s",
- loc->path, strerror (layout->list[i].err),
- (layout->list[i].xlator ?
- layout->list[i].xlator->name : "<>"));
- if (layout->list[i].err == ENOENT)
- ret = 1;
- }
- }
out:
- return ret;
+ return ret;
}
+int
+dht_dir_has_layout (dict_t *xattr, char *name)
+{
+
+ void *disk_layout_raw = NULL;
+
+ return dict_get_ptr (xattr, name, &disk_layout_raw);
+}
int
dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
- loc_t *loc, dict_t *xattr)
+ loc_t *loc, dict_t *xattr)
{
- int idx = 0;
- int pos = -1;
- int ret = 0;
- int err = 0;
- int dict_ret = 0;
- int32_t *disk_layout = NULL;
- int32_t count = -1;
- uint32_t start_off = -1;
- uint32_t stop_off = -1;
-
-
- for (idx = 0; idx < layout->cnt; idx++) {
- if (layout->list[idx].xlator == subvol) {
- pos = idx;
- break;
- }
- }
-
- if (pos == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s - no layout info for subvolume %s",
- loc->path, subvol->name);
- ret = 1;
- goto out;
- }
+ int idx = 0;
+ int pos = -1;
+ int ret = 0;
+ int err = 0;
+ int dict_ret = 0;
+ int32_t disk_layout[4];
+ void *disk_layout_raw = NULL;
+ int32_t count = -1;
+ uint32_t start_off = -1;
+ uint32_t stop_off = -1;
+ dht_conf_t *conf = this->private;
+
+
+ for (idx = 0; idx < layout->cnt; idx++) {
+ if (layout->list[idx].xlator == subvol) {
+ pos = idx;
+ break;
+ }
+ }
+
+ if (pos == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s - no layout info for subvolume %s",
+ loc->path, subvol->name);
+ ret = 1;
+ goto out;
+ }
err = layout->list[pos].err;
- if (!xattr) {
+ if (!xattr) {
if (err == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_INFO,
"%s - xattr dictionary is NULL",
loc->path);
ret = -1;
}
- goto out;
- }
+ goto out;
+ }
+
+ dict_ret = dict_get_ptr (xattr, conf->xattr_name,
+ &disk_layout_raw);
- dict_ret = dict_get_ptr (xattr, "trusted.glusterfs.dht",
- VOID(&disk_layout));
-
- if (dict_ret < 0) {
+ if (dict_ret < 0) {
if (err == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_INFO,
"%s - disk layout missing", loc->path);
ret = -1;
}
- goto out;
- }
-
- count = ntoh32 (disk_layout[0]);
- if (count != 1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s - disk layout has invalid count %d",
- loc->path, count);
- ret = -1;
- goto out;
- }
-
- start_off = ntoh32 (disk_layout[2]);
- stop_off = ntoh32 (disk_layout[3]);
-
- if ((layout->list[pos].start != start_off)
- || (layout->list[pos].stop != stop_off)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "subvol: %s; inode layout - %"PRId32" - %"PRId32"; "
- "disk layout - %"PRId32" - %"PRId32,
- layout->list[pos].xlator->name,
- layout->list[pos].start, layout->list[pos].stop,
- start_off, stop_off);
- ret = 1;
- } else {
- ret = 0;
- }
+ goto out;
+ }
+
+ memcpy (disk_layout, disk_layout_raw, sizeof (disk_layout));
+
+ count = ntoh32 (disk_layout[0]);
+ if (count != 1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s - disk layout has invalid count %d",
+ loc->path, count);
+ ret = -1;
+ goto out;
+ }
+
+ start_off = ntoh32 (disk_layout[2]);
+ stop_off = ntoh32 (disk_layout[3]);
+
+ if ((layout->list[pos].start != start_off)
+ || (layout->list[pos].stop != stop_off)) {
+ gf_log (this->name, GF_LOG_INFO,
+ "subvol: %s; inode layout - %"PRIu32" - %"PRIu32"; "
+ "disk layout - %"PRIu32" - %"PRIu32,
+ layout->list[pos].xlator->name,
+ layout->list[pos].start, layout->list[pos].stop,
+ start_off, stop_off);
+ ret = 1;
+ } else {
+ ret = 0;
+ }
out:
- return ret;
+ return ret;
}
int
-dht_layout_inode_set (xlator_t *this, xlator_t *subvol, inode_t *inode)
+dht_layout_preset (xlator_t *this, xlator_t *subvol, inode_t *inode)
{
dht_layout_t *layout = NULL;
- int ret = -1;
-
- layout = dht_layout_for_subvol (this, subvol);
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no pre-set layout for subvolume %s",
- subvol ? subvol->name : "<nil>");
- ret = -1;
- goto out;
- }
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ layout = dht_layout_for_subvol (this, subvol);
+ if (!layout) {
+ gf_log (this->name, GF_LOG_INFO,
+ "no pre-set layout for subvolume %s",
+ subvol ? subvol->name : "<nil>");
+ ret = -1;
+ goto out;
+ }
+
+ LOCK (&conf->layout_lock);
+ {
+ dht_inode_ctx_layout_set (inode, this, layout);
+ }
+ UNLOCK (&conf->layout_lock);
- inode_ctx_put (inode, this, (uint64_t)(long)layout);
-
ret = 0;
out:
return ret;
diff --git a/xlators/cluster/dht/src/dht-linkfile.c b/xlators/cluster/dht/src/dht-linkfile.c
index 213d3f2bf..dbc9d0b3c 100644
--- a/xlators/cluster/dht/src/dht-linkfile.c
+++ b/xlators/cluster/dht/src/dht-linkfile.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -28,197 +19,310 @@
#include "compat.h"
#include "dht-common.h"
-
-
int
-dht_linkfile_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
+dht_linkfile_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
{
- dht_local_t *local = NULL;
-
-
- local = frame->local;
- local->linkfile.linkfile_cbk (frame, cookie, this, op_ret, op_errno,
- local->linkfile.inode,
- &local->linkfile.stbuf);
-
- return 0;
+ char is_linkfile = 0;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ if (op_ret)
+ goto out;
+
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr,
+ conf->link_xattr_name);
+ if (!is_linkfile)
+ gf_log (this->name, GF_LOG_WARNING, "got non-linkfile %s:%s",
+ prev->this->name, local->loc.path);
+out:
+ local->linkfile.linkfile_cbk (frame, cookie, this, op_ret, op_errno,
+ inode, stbuf, postparent, postparent,
+ xattr);
+ return 0;
}
-
+#define is_equal(a, b) (a == b)
int
dht_linkfile_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct stat *stbuf)
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- dict_t *xattr = NULL;
- data_t *str_data = NULL;
- int ret = -1;
-
- local = frame->local;
- prev = cookie;
-
- if (op_ret == -1)
- goto err;
-
- xattr = get_new_dict ();
- if (!xattr) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- local->linkfile.xattr = dict_ref (xattr);
- local->linkfile.inode = inode_ref (inode);
-
- str_data = str_to_data (local->linkfile.srcvol->name);
- if (!str_data) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- ret = dict_set (xattr, "trusted.glusterfs.dht.linkto", str_data);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to initialize linkfile data");
- op_errno = EINVAL;
- }
- str_data = NULL;
-
- local->linkfile.stbuf = *stbuf;
-
- STACK_WIND (frame, dht_linkfile_xattr_cbk,
- prev->this, prev->this->fops->setxattr,
- &local->linkfile.loc, local->linkfile.xattr, 0);
-
- return 0;
-
-err:
- if (str_data) {
- data_destroy (str_data);
- str_data = NULL;
- }
-
- local->linkfile.linkfile_cbk (frame, cookie, this,
- op_ret, op_errno, inode, stbuf);
- return 0;
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ call_frame_t *prev = NULL;
+ dict_t *xattrs = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+
+ local = frame->local;
+
+ if (!op_ret)
+ local->linked = _gf_true;
+
+ FRAME_SU_UNDO (frame, dht_local_t);
+
+ if (op_ret && (op_errno == EEXIST)) {
+ conf = this->private;
+ prev = cookie;
+ subvol = prev->this;
+ if (!subvol)
+ goto out;
+ xattrs = dict_new ();
+ if (!xattrs)
+ goto out;
+ ret = dict_set_uint32 (xattrs, conf->link_xattr_name, 256);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set linkto key");
+ goto out;
+ }
+
+ STACK_WIND (frame, dht_linkfile_lookup_cbk, subvol,
+ subvol->fops->lookup, &local->loc, xattrs);
+ if (xattrs)
+ dict_unref (xattrs);
+ return 0;
+ }
+out:
+ local->linkfile.linkfile_cbk (frame, cookie, this, op_ret, op_errno,
+ inode, stbuf, preparent, postparent,
+ xdata);
+ if (xattrs)
+ dict_unref (xattrs);
+ return 0;
}
int
dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
- xlator_t *tovol, xlator_t *fromvol, loc_t *loc)
+ xlator_t *this,
+ xlator_t *tovol, xlator_t *fromvol, loc_t *loc)
{
- dht_local_t *local = NULL;
-
-
- local = frame->local;
- local->linkfile.linkfile_cbk = linkfile_cbk;
- local->linkfile.srcvol = tovol;
- loc_copy (&local->linkfile.loc, loc);
+ dht_local_t *local = NULL;
+ dict_t *dict = NULL;
+ int need_unref = 0;
+ int ret = 0;
+ dht_conf_t *conf = this->private;
+
+ local = frame->local;
+ local->linkfile.linkfile_cbk = linkfile_cbk;
+ local->linkfile.srcvol = tovol;
+
+ local->linked = _gf_false;
+
+ dict = local->params;
+ if (!dict) {
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+ need_unref = 1;
+ }
+
+ if (!uuid_is_null (local->gfid)) {
+ ret = dict_set_static_bin (dict, "gfid-req", local->gfid, 16);
+ if (ret)
+ gf_log ("dht-linkfile", GF_LOG_INFO,
+ "%s: gfid set failed", loc->path);
+ }
+
+ ret = dict_set_str (dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
+ if (ret)
+ gf_log ("dht-linkfile", GF_LOG_INFO,
+ "%s: internal-fop set failed", loc->path);
+
+ ret = dict_set_str (dict, conf->link_xattr_name, tovol->name);
+
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_INFO,
+ "%s: failed to initialize linkfile data",
+ loc->path);
+ goto out;
+ }
+
+ local->link_subvol = fromvol;
+ /* Always create as root:root. dht_linkfile_attr_heal fixes the
+ * ownsership */
+ FRAME_SU_DO (frame, dht_local_t);
+ STACK_WIND (frame, dht_linkfile_create_cbk,
+ fromvol, fromvol->fops->mknod, loc,
+ S_IFREG | DHT_LINKFILE_MODE, 0, 0, dict);
+
+ if (need_unref && dict)
+ dict_unref (dict);
+
+ return 0;
+out:
+ local->linkfile.linkfile_cbk (frame, NULL, frame->this, -1, ENOMEM,
+ loc->inode, NULL, NULL, NULL, NULL);
- STACK_WIND (frame, dht_linkfile_create_cbk,
- fromvol, fromvol->fops->mknod, loc,
- S_IFREG | DHT_LINKFILE_MODE, 0);
+ if (need_unref && dict)
+ dict_unref (dict);
- return 0;
+ return 0;
}
int
dht_linkfile_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *subvol = NULL;
- local = frame->local;
- prev = cookie;
- subvol = prev->this;
+ local = frame->local;
+ prev = cookie;
+ subvol = prev->this;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "unlinking linkfile %s on %s failed (%s)",
- local->loc.path, subvol->name, strerror (op_errno));
- }
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "unlinking linkfile %s on %s failed (%s)",
+ local->loc.path, subvol->name, strerror (op_errno));
+ }
- DHT_STACK_DESTROY (frame);
+ DHT_STACK_DESTROY (frame);
- return 0;
+ return 0;
}
int
dht_linkfile_unlink (call_frame_t *frame, xlator_t *this,
- xlator_t *subvol, loc_t *loc)
+ xlator_t *subvol, loc_t *loc)
{
- call_frame_t *unlink_frame = NULL;
- dht_local_t *unlink_local = NULL;
-
- unlink_frame = copy_frame (frame);
- if (!unlink_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- unlink_local = dht_local_init (unlink_frame);
- if (!unlink_local) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- loc_copy (&unlink_local->loc, loc);
-
- STACK_WIND (unlink_frame, dht_linkfile_unlink_cbk,
- subvol, subvol->fops->unlink,
- &unlink_local->loc);
-
- return 0;
+ call_frame_t *unlink_frame = NULL;
+ dht_local_t *unlink_local = NULL;
+
+ unlink_frame = copy_frame (frame);
+ if (!unlink_frame) {
+ goto err;
+ }
+
+ /* Using non-fop value here, as anyways, 'local->fop' is not used in
+ this particular case */
+ unlink_local = dht_local_init (unlink_frame, loc, NULL,
+ GF_FOP_MAXVALUE);
+ if (!unlink_local) {
+ goto err;
+ }
+
+ STACK_WIND (unlink_frame, dht_linkfile_unlink_cbk,
+ subvol, subvol->fops->unlink,
+ &unlink_local->loc, 0, NULL);
+
+ return 0;
err:
- if (unlink_frame)
- DHT_STACK_DESTROY (unlink_frame);
+ if (unlink_frame)
+ DHT_STACK_DESTROY (unlink_frame);
- return -1;
+ return -1;
}
xlator_t *
-dht_linkfile_subvol (xlator_t *this, inode_t *inode, struct stat *stbuf,
- dict_t *xattr)
+dht_linkfile_subvol (xlator_t *this, inode_t *inode, struct iatt *stbuf,
+ dict_t *xattr)
{
- dht_conf_t *conf = NULL;
- xlator_t *subvol = NULL;
- void *volname = NULL;
- int i = 0, ret = 0;
-
+ dht_conf_t *conf = NULL;
+ xlator_t *subvol = NULL;
+ void *volname = NULL;
+ int i = 0, ret = 0;
- conf = this->private;
+ conf = this->private;
- if (!xattr)
- goto out;
+ if (!xattr)
+ goto out;
- ret = dict_get_ptr (xattr, "trusted.glusterfs.dht.linkto", &volname);
+ ret = dict_get_ptr (xattr, conf->link_xattr_name, &volname);
- if ((-1 == ret) || !volname)
- goto out;
+ if ((-1 == ret) || !volname)
+ goto out;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (strcmp (conf->subvolumes[i]->name, (char *)volname) == 0) {
- subvol = conf->subvolumes[i];
- break;
- }
- }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (strcmp (conf->subvolumes[i]->name, (char *)volname) == 0) {
+ subvol = conf->subvolumes[i];
+ break;
+ }
+ }
out:
- return subvol;
+ return subvol;
+}
+
+int
+dht_linkfile_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ loc_t *loc = NULL;
+
+ local = frame->local;
+ loc = &local->loc;
+
+ if (op_ret)
+ gf_log (this->name, GF_LOG_ERROR, "setattr of uid/gid on %s"
+ " :<gfid:%s> failed (%s)",
+ (loc->path? loc->path: "NULL"),
+ uuid_utoa(local->gfid), strerror(op_errno));
+
+ DHT_STACK_DESTROY (frame);
+
+ return 0;
}
+int
+dht_linkfile_attr_heal (call_frame_t *frame, xlator_t *this)
+{
+ int ret = -1;
+ call_frame_t *copy = NULL;
+ dht_local_t *local = NULL;
+ dht_local_t *copy_local = NULL;
+ xlator_t *subvol = NULL;
+ struct iatt stbuf = {0,};
+
+ local = frame->local;
+
+ GF_VALIDATE_OR_GOTO ("dht", local, out);
+ GF_VALIDATE_OR_GOTO ("dht", local->link_subvol, out);
+
+ if (local->stbuf.ia_type == IA_INVAL)
+ return 0;
+
+ uuid_copy (local->loc.gfid, local->stbuf.ia_gfid);
+ copy = copy_frame (frame);
+
+ if (!copy)
+ goto out;
+
+ copy_local = dht_local_init (copy, &local->loc, NULL, 0);
+
+ if (!copy_local)
+ goto out;
+
+ stbuf = local->stbuf;
+ subvol = local->link_subvol;
+
+ copy->local = copy_local;
+
+ FRAME_SU_DO (copy, dht_local_t);
+
+ STACK_WIND (copy, dht_linkfile_setattr_cbk, subvol,
+ subvol->fops->setattr, &copy_local->loc,
+ &stbuf, (GF_SET_ATTR_UID | GF_SET_ATTR_GID), NULL);
+ ret = 0;
+out:
+ return ret;
+}
diff --git a/xlators/cluster/dht/src/dht-mem-types.h b/xlators/cluster/dht/src/dht-mem-types.h
new file mode 100644
index 000000000..e893eb48f
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-mem-types.h
@@ -0,0 +1,35 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef __DHT_MEM_TYPES_H__
+#define __DHT_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_dht_mem_types_ {
+ gf_dht_mt_dht_du_t = gf_common_mt_end + 1,
+ gf_dht_mt_dht_conf_t,
+ gf_dht_mt_char,
+ gf_dht_mt_int32_t,
+ gf_dht_mt_xlator_t,
+ gf_dht_mt_dht_layout_t,
+ gf_switch_mt_dht_conf_t,
+ gf_switch_mt_dht_du_t,
+ gf_switch_mt_switch_sched_array,
+ gf_switch_mt_switch_struct,
+ gf_dht_mt_subvol_time,
+ gf_dht_mt_loc_t,
+ gf_defrag_info_mt,
+ gf_dht_mt_inode_ctx_t,
+ gf_dht_mt_ctx_stat_time_t,
+ gf_dht_mt_end
+};
+#endif
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
new file mode 100644
index 000000000..bcb19f23e
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -0,0 +1,1815 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "dht-common.h"
+#include "xlator.h"
+#include <fnmatch.h>
+
+#define GF_DISK_SECTOR_SIZE 512
+#define DHT_REBALANCE_PID 4242 /* Change it if required */
+#define DHT_REBALANCE_BLKSIZE (128 * 1024)
+
+static int
+dht_write_with_holes (xlator_t *to, fd_t *fd, struct iovec *vec, int count,
+ int32_t size, off_t offset, struct iobref *iobref)
+{
+ int i = 0;
+ int ret = -1;
+ int start_idx = 0;
+ int tmp_offset = 0;
+ int write_needed = 0;
+ int buf_len = 0;
+ int size_pending = 0;
+ char *buf = NULL;
+
+ /* loop through each vector */
+ for (i = 0; i < count; i++) {
+ buf = vec[i].iov_base;
+ buf_len = vec[i].iov_len;
+
+ for (start_idx = 0; (start_idx + GF_DISK_SECTOR_SIZE) <= buf_len;
+ start_idx += GF_DISK_SECTOR_SIZE) {
+
+ if (mem_0filled (buf + start_idx, GF_DISK_SECTOR_SIZE) != 0) {
+ write_needed = 1;
+ continue;
+ }
+
+ if (write_needed) {
+ ret = syncop_write (to, fd, (buf + tmp_offset),
+ (start_idx - tmp_offset),
+ (offset + tmp_offset),
+ iobref, 0);
+ /* 'path' will be logged in calling function */
+ if (ret < 0) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to write (%s)",
+ strerror (errno));
+ goto out;
+ }
+
+ write_needed = 0;
+ }
+ tmp_offset = start_idx + GF_DISK_SECTOR_SIZE;
+ }
+
+ if ((start_idx < buf_len) || write_needed) {
+ /* This means, last chunk is not yet written.. write it */
+ ret = syncop_write (to, fd, (buf + tmp_offset),
+ (buf_len - tmp_offset),
+ (offset + tmp_offset), iobref, 0);
+ if (ret < 0) {
+ /* 'path' will be logged in calling function */
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to write (%s)",
+ strerror (errno));
+ goto out;
+ }
+ }
+
+ size_pending = (size - buf_len);
+ if (!size_pending)
+ break;
+ }
+
+ ret = size;
+out:
+ return ret;
+
+}
+
+int32_t
+gf_defrag_handle_hardlink (xlator_t *this, loc_t *loc, dict_t *xattrs,
+ struct iatt *stbuf)
+{
+ int32_t ret = -1;
+ xlator_t *cached_subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *linkto_subvol = NULL;
+ data_t *data = NULL;
+ struct iatt iatt = {0,};
+ int32_t op_errno = 0;
+ dht_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO ("defrag", loc, out);
+ GF_VALIDATE_OR_GOTO ("defrag", loc->name, out);
+ GF_VALIDATE_OR_GOTO ("defrag", stbuf, out);
+ GF_VALIDATE_OR_GOTO ("defrag", this, out);
+ GF_VALIDATE_OR_GOTO ("defrag", xattrs, out);
+ GF_VALIDATE_OR_GOTO ("defrag", this->private, out);
+
+ conf = this->private;
+
+ if (uuid_is_null (loc->pargfid)) {
+ gf_log ("", GF_LOG_ERROR, "loc->pargfid is NULL for "
+ "%s", loc->path);
+ goto out;
+ }
+
+ if (uuid_is_null (loc->gfid)) {
+ gf_log ("", GF_LOG_ERROR, "loc->gfid is NULL for "
+ "%s", loc->path);
+ goto out;
+ }
+
+ cached_subvol = dht_subvol_get_cached (this, loc->inode);
+ if (!cached_subvol) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get cached subvol"
+ " for %s on %s", loc->name, this->name);
+ goto out;
+ }
+
+ hashed_subvol = dht_subvol_get_hashed (this, loc);
+ if (!hashed_subvol) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get hashed subvol"
+ " for %s on %s", loc->name, this->name);
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_INFO, "Attempting to migrate hardlink %s "
+ "with gfid %s from %s -> %s", loc->name, uuid_utoa (loc->gfid),
+ cached_subvol->name, hashed_subvol->name);
+ data = dict_get (xattrs, conf->link_xattr_name);
+ /* set linkto on cached -> hashed if not present, else link it */
+ if (!data) {
+ ret = dict_set_str (xattrs, conf->link_xattr_name,
+ hashed_subvol->name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set "
+ "linkto xattr in dict for %s", loc->name);
+ goto out;
+ }
+
+ ret = syncop_setxattr (cached_subvol, loc, xattrs, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Linkto setxattr "
+ "failed %s -> %s (%s)", cached_subvol->name,
+ loc->name, strerror (errno));
+ goto out;
+ }
+ goto out;
+ } else {
+ linkto_subvol = dht_linkfile_subvol (this, NULL, NULL, xattrs);
+ if (!linkto_subvol) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get "
+ "linkto subvol for %s", loc->name);
+ } else {
+ hashed_subvol = linkto_subvol;
+ }
+
+ ret = syncop_link (hashed_subvol, loc, loc);
+ if (ret) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR, "link of %s -> %s"
+ " failed on subvol %s (%s)", loc->name,
+ uuid_utoa(loc->gfid),
+ hashed_subvol->name, strerror (op_errno));
+ if (op_errno != EEXIST)
+ goto out;
+ }
+ }
+ ret = syncop_lookup (hashed_subvol, loc, NULL, &iatt, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed lookup %s on %s (%s)"
+ , loc->name, hashed_subvol->name, strerror (errno));
+ goto out;
+ }
+
+ if (iatt.ia_nlink == stbuf->ia_nlink) {
+ ret = dht_migrate_file (this, loc, cached_subvol, hashed_subvol,
+ GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS);
+ if (ret)
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+
+static inline int
+__is_file_migratable (xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, dict_t *xattrs, int flags)
+{
+ int ret = -1;
+
+ if (IA_ISDIR (stbuf->ia_type)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: migrate-file called on directory", loc->path);
+ ret = -1;
+ goto out;
+ }
+
+ if (flags == GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS) {
+ ret = 0;
+ goto out;
+ }
+ if (stbuf->ia_nlink > 1) {
+ /* support for decomission */
+ if (flags == GF_DHT_MIGRATE_HARDLINK) {
+ ret = gf_defrag_handle_hardlink (this, loc,
+ xattrs, stbuf);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to migrate file with link",
+ loc->path);
+ }
+ } else {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: file has hardlinks", loc->path);
+ }
+ ret = ENOTSUP;
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static inline int
+__dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc, struct iatt *stbuf,
+ dict_t *dict, fd_t **dst_fd, dict_t *xattr)
+{
+ xlator_t *this = NULL;
+ int ret = -1;
+ fd_t *fd = NULL;
+ struct iatt new_stbuf = {0,};
+ dht_conf_t *conf = NULL;
+
+ this = THIS;
+ conf = this->private;
+
+ ret = dict_set_static_bin (dict, "gfid-req", stbuf->ia_gfid, 16);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to set gfid in dict for create", loc->path);
+ goto out;
+ }
+
+ ret = dict_set_str (dict, conf->link_xattr_name, from->name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to set gfid in dict for create", loc->path);
+ goto out;
+ }
+
+ fd = fd_create (loc->inode, DHT_REBALANCE_PID);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: fd create failed (destination) (%s)",
+ loc->path, strerror (errno));
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_lookup (to, loc, NULL, &new_stbuf, NULL, NULL);
+ if (!ret) {
+ /* File exits in the destination, check if gfid matches */
+ if (uuid_compare (stbuf->ia_gfid, new_stbuf.ia_gfid) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "file %s exits in %s with different gfid",
+ loc->path, to->name);
+ fd_unref (fd);
+ goto out;
+ }
+ }
+ if ((ret == -1) && (errno != ENOENT)) {
+ /* File exists in destination, but not accessible */
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "%s: failed to lookup file (%s)",
+ loc->path, strerror (errno));
+ goto out;
+ }
+
+ /* Create the destination with LINKFILE mode, and linkto xattr,
+ if the linkfile already exists, it will just open the file */
+ ret = syncop_create (to, loc, O_RDWR, DHT_LINKFILE_MODE, fd,
+ dict, &new_stbuf);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create %s on %s (%s)",
+ loc->path, to->name, strerror (errno));
+ goto out;
+ }
+
+ ret = syncop_fsetxattr (to, fd, xattr, 0);
+ if (ret == -1)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set xattr on %s (%s)",
+ loc->path, to->name, strerror (errno));
+
+ ret = syncop_ftruncate (to, fd, stbuf->ia_size);
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_ERROR,
+ "ftruncate failed for %s on %s (%s)",
+ loc->path, to->name, strerror (errno));
+
+ ret = syncop_fsetattr (to, fd, stbuf,
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID),
+ NULL, NULL);
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_ERROR,
+ "chown failed for %s on %s (%s)",
+ loc->path, to->name, strerror (errno));
+
+ if (dst_fd)
+ *dst_fd = fd;
+
+ /* success */
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static inline int
+__dht_check_free_space (xlator_t *to, xlator_t *from, loc_t *loc,
+ struct iatt *stbuf, int flag)
+{
+ struct statvfs src_statfs = {0,};
+ struct statvfs dst_statfs = {0,};
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ uint64_t src_statfs_blocks = 1;
+ uint64_t dst_statfs_blocks = 1;
+
+ this = THIS;
+
+ ret = syncop_statfs (from, loc, &src_statfs);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to get statfs of %s on %s (%s)",
+ loc->path, from->name, strerror (errno));
+ goto out;
+ }
+
+ ret = syncop_statfs (to, loc, &dst_statfs);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to get statfs of %s on %s (%s)",
+ loc->path, to->name, strerror (errno));
+ goto out;
+ }
+
+ /* if force option is given, do not check for space @ dst.
+ * Check only if space is avail for the file */
+ if (flag != GF_DHT_MIGRATE_DATA)
+ goto check_avail_space;
+
+ /* Check:
+ During rebalance `migrate-data` - Destination subvol experiences
+ a `reduction` in 'blocks' of free space, at the same time source
+ subvol gains certain 'blocks' of free space. A valid check is
+ necessary here to avoid errorneous move to destination where
+ the space could be scantily available.
+ */
+ if (stbuf) {
+ dst_statfs_blocks = ((dst_statfs.f_bavail *
+ dst_statfs.f_bsize) /
+ GF_DISK_SECTOR_SIZE);
+ src_statfs_blocks = ((src_statfs.f_bavail *
+ src_statfs.f_bsize) /
+ GF_DISK_SECTOR_SIZE);
+ if ((dst_statfs_blocks - stbuf->ia_blocks) <
+ (src_statfs_blocks + stbuf->ia_blocks)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "data movement attempted from node (%s) with"
+ " higher disk space to a node (%s) with "
+ "lesser disk space (%s)", from->name,
+ to->name, loc->path);
+
+ /* this is not a 'failure', but we don't want to
+ consider this as 'success' too :-/ */
+ ret = 1;
+ goto out;
+ }
+ }
+check_avail_space:
+ if (((dst_statfs.f_bavail * dst_statfs.f_bsize) /
+ GF_DISK_SECTOR_SIZE) < stbuf->ia_blocks) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "data movement attempted from node (%s) with "
+ "to node (%s) which does not have required free space"
+ " for %s", from->name, to->name, loc->path);
+ ret = 1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static inline int
+__dht_rebalance_migrate_data (xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst,
+ uint64_t ia_size, int hole_exists)
+{
+ int ret = 0;
+ int count = 0;
+ off_t offset = 0;
+ struct iovec *vector = NULL;
+ struct iobref *iobref = NULL;
+ uint64_t total = 0;
+ size_t read_size = 0;
+
+ /* if file size is '0', no need to enter this loop */
+ while (total < ia_size) {
+ read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE) ?
+ DHT_REBALANCE_BLKSIZE : (ia_size - total));
+ ret = syncop_readv (from, src, read_size,
+ offset, 0, &vector, &count, &iobref);
+ if (!ret || (ret < 0)) {
+ break;
+ }
+
+ if (hole_exists)
+ ret = dht_write_with_holes (to, dst, vector, count,
+ ret, offset, iobref);
+ else
+ ret = syncop_writev (to, dst, vector, count,
+ offset, iobref, 0);
+ if (ret < 0) {
+ break;
+ }
+ offset += ret;
+ total += ret;
+
+ GF_FREE (vector);
+ if (iobref)
+ iobref_unref (iobref);
+ iobref = NULL;
+ vector = NULL;
+ }
+ if (iobref)
+ iobref_unref (iobref);
+ GF_FREE (vector);
+
+ if (ret >= 0)
+ ret = 0;
+
+ return ret;
+}
+
+
+static inline int
+__dht_rebalance_open_src_file (xlator_t *from, xlator_t *to, loc_t *loc,
+ struct iatt *stbuf, fd_t **src_fd)
+{
+ int ret = 0;
+ fd_t *fd = NULL;
+ dict_t *dict = NULL;
+ xlator_t *this = NULL;
+ struct iatt iatt = {0,};
+ dht_conf_t *conf = NULL;
+
+ this = THIS;
+ conf = this->private;
+
+ fd = fd_create (loc->inode, DHT_REBALANCE_PID);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: fd create failed (source)", loc->path);
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_open (from, loc, O_RDWR, fd);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to open file %s on %s (%s)",
+ loc->path, from->name, strerror (errno));
+ goto out;
+ }
+
+ ret = -1;
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_set_str (dict, conf->link_xattr_name, to->name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set xattr in dict for %s (linkto:%s)",
+ loc->path, to->name);
+ goto out;
+ }
+
+ /* Once the migration starts, the source should have 'linkto' key set
+ to show which is the target, so other clients can work around it */
+ ret = syncop_setxattr (from, loc, dict, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set xattr on %s in %s (%s)",
+ loc->path, from->name, strerror (errno));
+ goto out;
+ }
+
+ /* mode should be (+S+T) to indicate migration is in progress */
+ iatt.ia_prot = stbuf->ia_prot;
+ iatt.ia_type = stbuf->ia_type;
+ iatt.ia_prot.sticky = 1;
+ iatt.ia_prot.sgid = 1;
+
+ ret = syncop_setattr (from, loc, &iatt, GF_SET_ATTR_MODE, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set mode on %s in %s (%s)",
+ loc->path, from->name, strerror (errno));
+ goto out;
+ }
+
+ if (src_fd)
+ *src_fd = fd;
+
+ /* success */
+ ret = 0;
+out:
+ if (dict)
+ dict_unref (dict);
+
+ return ret;
+}
+
+int
+migrate_special_files (xlator_t *this, xlator_t *from, xlator_t *to, loc_t *loc,
+ struct iatt *buf)
+{
+ int ret = -1;
+ dict_t *rsp_dict = NULL;
+ dict_t *dict = NULL;
+ char *link = NULL;
+ struct iatt stbuf = {0,};
+ dht_conf_t *conf = this->private;
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_set_int32 (dict, conf->link_xattr_name, 256);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to set 'linkto' key in dict", loc->path);
+ goto out;
+ }
+
+ /* check in the destination if the file is link file */
+ ret = syncop_lookup (to, loc, dict, &stbuf, &rsp_dict, NULL);
+ if ((ret == -1) && (errno != ENOENT)) {
+ gf_log (this->name, GF_LOG_WARNING, "%s: lookup failed (%s)",
+ loc->path, strerror (errno));
+ goto out;
+ }
+
+ /* we no more require this key */
+ dict_del (dict, conf->link_xattr_name);
+
+ /* file exists in target node, only if it is 'linkfile' its valid,
+ otherwise, error out */
+ if (!ret) {
+ if (!check_is_linkfile (loc->inode, &stbuf, rsp_dict,
+ conf->link_xattr_name)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: file exists in destination", loc->path);
+ ret = -1;
+ goto out;
+ }
+
+ /* as file is linkfile, delete it */
+ ret = syncop_unlink (to, loc);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to delete the linkfile (%s)",
+ loc->path, strerror (errno));
+ goto out;
+ }
+ }
+
+ /* Set the gfid of the source file in dict */
+ ret = dict_set_static_bin (dict, "gfid-req", buf->ia_gfid, 16);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to set gfid in dict for create", loc->path);
+ goto out;
+ }
+
+ /* Create the file in target */
+ if (IA_ISLNK (buf->ia_type)) {
+ /* Handle symlinks separately */
+ ret = syncop_readlink (from, loc, &link, buf->ia_size);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: readlink on symlink failed (%s)",
+ loc->path, strerror (errno));
+ goto out;
+ }
+
+ ret = syncop_symlink (to, loc, link, dict, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: creating symlink failed (%s)",
+ loc->path, strerror (errno));
+ goto out;
+ }
+
+ goto done;
+ }
+
+ ret = syncop_mknod (to, loc, st_mode_from_ia (buf->ia_prot,
+ buf->ia_type),
+ makedev (ia_major (buf->ia_rdev),
+ ia_minor (buf->ia_rdev)), dict, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "%s: mknod failed (%s)",
+ loc->path, strerror (errno));
+ goto out;
+ }
+
+done:
+ ret = syncop_setattr (to, loc, buf,
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID |
+ GF_SET_ATTR_MODE), NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to perform setattr on %s (%s)",
+ loc->path, to->name, strerror (errno));
+ }
+
+ ret = syncop_unlink (from, loc);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING, "%s: unlink failed (%s)",
+ loc->path, strerror (errno));
+
+out:
+ if (dict)
+ dict_unref (dict);
+
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+
+ return ret;
+}
+
+/*
+ return values:
+
+ -1 : failure
+ 0 : successfully migrated data
+ 1 : not a failure, but we can't migrate data as of now
+*/
+int
+dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
+ int flag)
+{
+ int ret = -1;
+ struct iatt new_stbuf = {0,};
+ struct iatt stbuf = {0,};
+ struct iatt empty_iatt = {0,};
+ ia_prot_t src_ia_prot = {0,};
+ fd_t *src_fd = NULL;
+ fd_t *dst_fd = NULL;
+ dict_t *dict = NULL;
+ dict_t *xattr = NULL;
+ dict_t *xattr_rsp = NULL;
+ int file_has_holes = 0;
+ dht_conf_t *conf = this->private;
+
+ gf_log (this->name, GF_LOG_INFO, "%s: attempting to move from %s to %s",
+ loc->path, from->name, to->name);
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_set_int32 (dict, conf->link_xattr_name, 256);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to set 'linkto' key in dict", loc->path);
+ goto out;
+ }
+
+ /* Phase 1 - Data migration is in progress from now on */
+ ret = syncop_lookup (from, loc, dict, &stbuf, &xattr_rsp, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: lookup failed on %s (%s)",
+ loc->path, from->name, strerror (errno));
+ goto out;
+ }
+
+ /* we no more require this key */
+ dict_del (dict, conf->link_xattr_name);
+
+ /* preserve source mode, so set the same to the destination */
+ src_ia_prot = stbuf.ia_prot;
+
+ /* Check if file can be migrated */
+ ret = __is_file_migratable (this, loc, &stbuf, xattr_rsp, flag);
+ if (ret)
+ goto out;
+
+ /* Take care of the special files */
+ if (!IA_ISREG (stbuf.ia_type)) {
+ /* Special files */
+ ret = migrate_special_files (this, from, to, loc, &stbuf);
+ goto out;
+ }
+
+ /* TODO: move all xattr related operations to fd based operations */
+ ret = syncop_listxattr (from, loc, &xattr);
+ if (ret == -1)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to get xattr from %s (%s)",
+ loc->path, from->name, strerror (errno));
+
+ /* create the destination, with required modes/xattr */
+ ret = __dht_rebalance_create_dst_file (to, from, loc, &stbuf,
+ dict, &dst_fd, xattr);
+ if (ret)
+ goto out;
+
+ ret = __dht_check_free_space (to, from, loc, &stbuf, flag);
+ if (ret) {
+ goto out;
+ }
+
+ /* Open the source, and also update mode/xattr */
+ ret = __dht_rebalance_open_src_file (from, to, loc, &stbuf, &src_fd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to open %s on %s",
+ loc->path, from->name);
+ goto out;
+ }
+
+
+ ret = syncop_fstat (from, src_fd, &stbuf);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to lookup %s on %s (%s)",
+ loc->path, from->name, strerror (errno));
+ goto out;
+ }
+
+ /* Try to preserve 'holes' while migrating data */
+ if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE))
+ file_has_holes = 1;
+
+ /* All I/O happens in this function */
+ ret = __dht_rebalance_migrate_data (from, to, src_fd, dst_fd,
+ stbuf.ia_size, file_has_holes);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: failed to migrate data",
+ loc->path);
+ /* reset the destination back to 0 */
+ ret = syncop_ftruncate (to, dst_fd, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to reset target size back to 0 (%s)",
+ loc->path, strerror (errno));
+ }
+
+ ret = -1;
+ goto out;
+ }
+
+ /* TODO: Sync the locks */
+
+ ret = syncop_fsync (to, dst_fd, 0);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to fsync on %s (%s)",
+ loc->path, to->name, strerror (errno));
+
+
+ /* Phase 2 - Data-Migration Complete, Housekeeping updates pending */
+
+ ret = syncop_fstat (from, src_fd, &new_stbuf);
+ if (ret < 0) {
+ /* Failed to get the stat info */
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to fstat file %s on %s (%s)",
+ loc->path, from->name, strerror (errno));
+ goto out;
+ }
+
+ /* source would have both sticky bit and sgid bit set, reset it to 0,
+ and set the source permission on destination, if it was not set
+ prior to setting rebalance-modes in source */
+ if (!src_ia_prot.sticky)
+ new_stbuf.ia_prot.sticky = 0;
+
+ if (!src_ia_prot.sgid)
+ new_stbuf.ia_prot.sgid = 0;
+
+ /* TODO: if the source actually had sticky bit, or sgid bit set,
+ we are not handling it */
+
+ ret = syncop_fsetattr (to, dst_fd, &new_stbuf,
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID |
+ GF_SET_ATTR_MODE), NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to perform setattr on %s (%s)",
+ loc->path, to->name, strerror (errno));
+ goto out;
+ }
+
+ /* Because 'futimes' is not portable */
+ ret = syncop_setattr (to, loc, &new_stbuf,
+ (GF_SET_ATTR_MTIME | GF_SET_ATTR_ATIME),
+ NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to perform setattr on %s (%s)",
+ loc->path, to->name, strerror (errno));
+ }
+
+ /* Make the source as a linkfile first before deleting it */
+ empty_iatt.ia_prot.sticky = 1;
+ ret = syncop_fsetattr (from, src_fd, &empty_iatt,
+ GF_SET_ATTR_MODE, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, \
+ "%s: failed to perform setattr on %s (%s)",
+ loc->path, from->name, strerror (errno));
+ goto out;
+ }
+
+ /* Free up the data blocks on the source node, as the whole
+ file is migrated */
+ ret = syncop_ftruncate (from, src_fd, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to perform truncate on %s (%s)",
+ loc->path, from->name, strerror (errno));
+ }
+
+ /* remove the 'linkto' xattr from the destination */
+ ret = syncop_fremovexattr (to, dst_fd, conf->link_xattr_name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to perform removexattr on %s (%s)",
+ loc->path, to->name, strerror (errno));
+ }
+
+ /* Do a stat and check the gfid before unlink */
+ ret = syncop_stat (from, loc, &empty_iatt);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to do a stat on %s (%s)",
+ loc->path, from->name, strerror (errno));
+ goto out;
+ }
+
+ if (uuid_compare (empty_iatt.ia_gfid, loc->gfid) == 0) {
+ /* take out the source from namespace */
+ ret = syncop_unlink (from, loc);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to perform unlink on %s (%s)",
+ loc->path, from->name, strerror (errno));
+ goto out;
+ }
+ }
+
+ ret = syncop_lookup (this, loc, NULL, NULL, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s: failed to lookup the file on subvolumes (%s)",
+ loc->path, strerror (errno));
+ }
+
+ gf_log (this->name, GF_LOG_INFO,
+ "completed migration of %s from subvolume %s to %s",
+ loc->path, from->name, to->name);
+
+ ret = 0;
+out:
+ if (dict)
+ dict_unref (dict);
+
+ if (xattr)
+ dict_unref (xattr);
+ if (xattr_rsp)
+ dict_unref (xattr_rsp);
+
+ if (dst_fd)
+ syncop_close (dst_fd);
+ if (src_fd)
+ syncop_close (src_fd);
+
+ return ret;
+}
+
+static int
+rebalance_task (void *data)
+{
+ int ret = -1;
+ dht_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+
+ frame = data;
+
+ local = frame->local;
+
+ /* This function is 'synchrounous', hence if it returns,
+ we are done with the task */
+ ret = dht_migrate_file (THIS, &local->loc, local->rebalance.from_subvol,
+ local->rebalance.target_node, local->flags);
+
+ return ret;
+}
+
+static int
+rebalance_task_completion (int op_ret, call_frame_t *sync_frame, void *data)
+{
+ int ret = -1;
+ uint64_t layout_int = 0;
+ dht_layout_t *layout = 0;
+ xlator_t *this = NULL;
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ this = THIS;
+ local = sync_frame->local;
+
+ if (!op_ret) {
+ /* Make sure we have valid 'layout' in inode ctx
+ after the operation */
+ ret = inode_ctx_del (local->loc.inode, this, &layout_int);
+ if (!ret && layout_int) {
+ layout = (dht_layout_t *)(long)layout_int;
+ dht_layout_unref (this, layout);
+ }
+
+ ret = dht_layout_preset (this, local->rebalance.target_node,
+ local->loc.inode);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set inode ctx", local->loc.path);
+ }
+
+ if (op_ret == -1) {
+ /* Failure of migration process, mostly due to write process.
+ as we can't preserve the exact errno, lets say there was
+ no space to migrate-data
+ */
+ op_errno = ENOSPC;
+ }
+
+ if (op_ret == 1) {
+ /* migration didn't happen, but is not a failure, let the user
+ understand that he doesn't have permission to migrate the
+ file.
+ */
+ op_ret = -1;
+ op_errno = EPERM;
+ }
+
+ DHT_STACK_UNWIND (setxattr, sync_frame, op_ret, op_errno, NULL);
+ return 0;
+}
+
+int
+dht_start_rebalance_task (xlator_t *this, call_frame_t *frame)
+{
+ int ret = -1;
+
+ ret = synctask_new (this->ctx->env, rebalance_task,
+ rebalance_task_completion,
+ frame, frame);
+ return ret;
+}
+
+int
+gf_listener_stop (xlator_t *this)
+{
+ glusterfs_ctx_t *ctx = NULL;
+ cmd_args_t *cmd_args = NULL;
+ int ret = 0;
+
+ ctx = this->ctx;
+ GF_ASSERT (ctx);
+ cmd_args = &ctx->cmd_args;
+ if (cmd_args->sock_file) {
+ ret = unlink (cmd_args->sock_file);
+ if (ret && (ENOENT == errno)) {
+ ret = 0;
+ }
+ }
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to unlink listener "
+ "socket %s, error: %s", cmd_args->sock_file,
+ strerror (errno));
+ }
+ return ret;
+}
+
+void
+dht_build_root_inode (xlator_t *this, inode_t **inode)
+{
+ inode_table_t *itable = NULL;
+ uuid_t root_gfid = {0, };
+
+ itable = inode_table_new (0, this);
+ if (!itable)
+ return;
+
+ root_gfid[15] = 1;
+ *inode = inode_find (itable, root_gfid);
+}
+
+void
+dht_build_root_loc (inode_t *inode, loc_t *loc)
+{
+ loc->path = "/";
+ loc->inode = inode;
+ loc->inode->ia_type = IA_IFDIR;
+ memset (loc->gfid, 0, 16);
+ loc->gfid[15] = 1;
+}
+
+
+/* return values: 1 -> error, bug ignore and continue
+ 0 -> proceed
+ -1 -> error, handle it */
+int32_t
+gf_defrag_handle_migrate_error (int32_t op_errno, gf_defrag_info_t *defrag)
+{
+ /* if errno is not ENOSPC or ENOTCONN, we can still continue
+ with rebalance process */
+ if ((errno != ENOSPC) || (errno != ENOTCONN))
+ return 1;
+
+ if (errno == ENOTCONN) {
+ /* Most probably mount point went missing (mostly due
+ to a brick down), say rebalance failure to user,
+ let him restart it if everything is fine */
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+ return -1;
+ }
+
+ if (errno == ENOSPC) {
+ /* rebalance process itself failed, may be
+ remote brick went down, or write failed due to
+ disk full etc etc.. */
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+ return -1;
+ }
+
+ return 0;
+}
+
+static gf_boolean_t
+gf_defrag_pattern_match (gf_defrag_info_t *defrag, char *name, uint64_t size)
+{
+ gf_defrag_pattern_list_t *trav = NULL;
+ gf_boolean_t match = _gf_false;
+ gf_boolean_t ret = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("dht", defrag, out);
+
+ trav = defrag->defrag_pattern;
+ while (trav) {
+ if (!fnmatch (trav->path_pattern, name, FNM_NOESCAPE)) {
+ match = _gf_true;
+ break;
+ }
+ trav = trav->next;
+ }
+
+ if ((match == _gf_true) && (size >= trav->size))
+ ret = _gf_true;
+
+ out:
+ return ret;
+}
+
+/* We do a depth first traversal of directories. But before we move into
+ * subdirs, we complete the data migration of those directories whose layouts
+ * have been fixed
+ */
+
+int
+gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ dict_t *migrate_data)
+{
+ int ret = -1;
+ loc_t entry_loc = {0,};
+ fd_t *fd = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *tmp = NULL;
+ gf_dirent_t *entry = NULL;
+ gf_boolean_t free_entries = _gf_false;
+ off_t offset = 0;
+ dict_t *dict = NULL;
+ struct iatt iatt = {0,};
+ int32_t op_errno = 0;
+ char *uuid_str = NULL;
+ uuid_t node_uuid = {0,};
+ int readdir_operrno = 0;
+ struct timeval dir_start = {0,};
+ struct timeval end = {0,};
+ double elapsed = {0,};
+ struct timeval start = {0,};
+ int32_t err = 0;
+
+ gf_log (this->name, GF_LOG_INFO, "migrate data called on %s",
+ loc->path);
+ gettimeofday (&dir_start, NULL);
+
+ fd = fd_create (loc->inode, defrag->pid);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to create fd");
+ goto out;
+ }
+
+ ret = syncop_opendir (this, loc, fd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to open dir %s",
+ loc->path);
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&entries.list);
+
+ while ((ret = syncop_readdirp (this, fd, 131072, offset, NULL,
+ &entries)) != 0) {
+
+ if (ret < 0) {
+
+ gf_log (this->name, GF_LOG_ERROR, "Readdir returned %s."
+ " Aborting migrate-data",
+ strerror(readdir_operrno));
+ goto out;
+ }
+
+ /* Need to keep track of ENOENT errno, that means, there is no
+ need to send more readdirp() */
+ readdir_operrno = errno;
+
+ if (list_empty (&entries.list))
+ break;
+
+ free_entries = _gf_true;
+
+ list_for_each_entry_safe (entry, tmp, &entries.list, list) {
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+ ret = 1;
+ goto out;
+ }
+
+ offset = entry->d_off;
+
+ if (!strcmp (entry->d_name, ".") ||
+ !strcmp (entry->d_name, ".."))
+ continue;
+
+ if (IA_ISDIR (entry->d_stat.ia_type))
+ continue;
+
+ defrag->num_files_lookedup++;
+ if (defrag->stats == _gf_true) {
+ gettimeofday (&start, NULL);
+ }
+ if (defrag->defrag_pattern &&
+ (gf_defrag_pattern_match (defrag, entry->d_name,
+ entry->d_stat.ia_size)
+ == _gf_false)) {
+ continue;
+ }
+ loc_wipe (&entry_loc);
+ ret =dht_build_child_loc (this, &entry_loc, loc,
+ entry->d_name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Child loc"
+ " build failed");
+ goto out;
+ }
+
+ if (uuid_is_null (entry->d_stat.ia_gfid)) {
+ gf_log (this->name, GF_LOG_ERROR, "%s/%s"
+ " gfid not present", loc->path,
+ entry->d_name);
+ continue;
+ }
+
+ uuid_copy (entry_loc.gfid, entry->d_stat.ia_gfid);
+
+ if (uuid_is_null (loc->gfid)) {
+ gf_log (this->name, GF_LOG_ERROR, "%s/%s"
+ " gfid not present", loc->path,
+ entry->d_name);
+ continue;
+ }
+
+ uuid_copy (entry_loc.pargfid, loc->gfid);
+
+ entry_loc.inode->ia_type = entry->d_stat.ia_type;
+
+ ret = syncop_lookup (this, &entry_loc, NULL, &iatt,
+ NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s"
+ " lookup failed", entry_loc.path);
+ continue;
+ }
+
+ ret = syncop_getxattr (this, &entry_loc, &dict,
+ GF_XATTR_NODE_UUID_KEY);
+ if(ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "get node-uuid for %s", entry_loc.path);
+ continue;
+ }
+
+ ret = dict_get_str (dict, GF_XATTR_NODE_UUID_KEY,
+ &uuid_str);
+ if(ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "get node-uuid from dict for %s",
+ entry_loc.path);
+ continue;
+ }
+
+ if (uuid_parse (uuid_str, node_uuid)) {
+ gf_log (this->name, GF_LOG_ERROR, "uuid_parse "
+ "failed for %s", entry_loc.path);
+ continue;
+ }
+
+ /* if file belongs to different node, skip migration
+ * the other node will take responsibility of migration
+ */
+ if (uuid_compare (node_uuid, defrag->node_uuid)) {
+ gf_log (this->name, GF_LOG_TRACE, "%s does not"
+ "belong to this node", entry_loc.path);
+ continue;
+ }
+
+ uuid_str = NULL;
+
+ dict_del (dict, GF_XATTR_NODE_UUID_KEY);
+
+
+ /* if distribute is present, it will honor this key.
+ * -1 is returned if distribute is not present or file
+ * doesn't have a link-file. If file has link-file, the
+ * path of link-file will be the value, and also that
+ * guarantees that file has to be mostly migrated */
+
+ ret = syncop_getxattr (this, &entry_loc, &dict,
+ GF_XATTR_LINKINFO_KEY);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_TRACE, "failed to "
+ "get link-to key for %s",
+ entry_loc.path);
+ continue;
+ }
+
+ ret = syncop_setxattr (this, &entry_loc, migrate_data,
+ 0);
+ if (ret) {
+ err = op_errno;
+ /* errno is overloaded. See
+ * rebalance_task_completion () */
+ if (err != ENOSPC) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "migrate-data skipped for %s"
+ " due to space constraints",
+ entry_loc.path);
+ defrag->skipped +=1;
+ } else{
+ gf_log (this->name, GF_LOG_ERROR,
+ "migrate-data failed for %s",
+ entry_loc.path);
+ defrag->total_failures +=1;
+ }
+ }
+
+ if (ret == -1) {
+ op_errno = errno;
+ ret = gf_defrag_handle_migrate_error (op_errno,
+ defrag);
+
+ if (!ret)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "migrate-data on %s failed: %s",
+ entry_loc.path,
+ strerror (op_errno));
+ else if (ret == 1)
+ continue;
+ else if (ret == -1)
+ goto out;
+ }
+
+ LOCK (&defrag->lock);
+ {
+ defrag->total_files += 1;
+ defrag->total_data += iatt.ia_size;
+ }
+ UNLOCK (&defrag->lock);
+ if (defrag->stats == _gf_true) {
+ gettimeofday (&end, NULL);
+ elapsed = (end.tv_sec - start.tv_sec) * 1e6 +
+ (end.tv_usec - start.tv_usec);
+ gf_log (this->name, GF_LOG_INFO, "Migration of "
+ "file:%s size:%"PRIu64" bytes took %.2f"
+ "secs", entry_loc.path, iatt.ia_size,
+ elapsed/1e6);
+ }
+ }
+
+ gf_dirent_free (&entries);
+ free_entries = _gf_false;
+ INIT_LIST_HEAD (&entries.list);
+
+ if (readdir_operrno == ENOENT)
+ break;
+ }
+
+ gettimeofday (&end, NULL);
+ elapsed = (end.tv_sec - dir_start.tv_sec) * 1e6 +
+ (end.tv_usec - dir_start.tv_usec);
+ gf_log (this->name, GF_LOG_INFO, "Migration operation on dir %s took "
+ "%.2f secs", loc->path, elapsed/1e6);
+ ret = 0;
+out:
+ if (free_entries)
+ gf_dirent_free (&entries);
+
+ loc_wipe (&entry_loc);
+
+ if (dict)
+ dict_unref(dict);
+
+ if (fd)
+ fd_unref (fd);
+ return ret;
+
+}
+
+
+int
+gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ dict_t *fix_layout, dict_t *migrate_data)
+{
+ int ret = -1;
+ loc_t entry_loc = {0,};
+ fd_t *fd = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *tmp = NULL;
+ gf_dirent_t *entry = NULL;
+ gf_boolean_t free_entries = _gf_false;
+ dict_t *dict = NULL;
+ off_t offset = 0;
+ struct iatt iatt = {0,};
+ int readdirp_errno = 0;
+
+ ret = syncop_lookup (this, loc, NULL, &iatt, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Lookup failed on %s",
+ loc->path);
+ goto out;
+ }
+
+ if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) {
+ ret = gf_defrag_migrate_data (this, defrag, loc, migrate_data);
+ if (ret)
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE, "fix layout called on %s", loc->path);
+
+ fd = fd_create (loc->inode, defrag->pid);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to create fd");
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_opendir (this, loc, fd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to open dir %s",
+ loc->path);
+ ret = -1;
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&entries.list);
+ while ((ret = syncop_readdirp (this, fd, 131072, offset, NULL,
+ &entries)) != 0)
+ {
+
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Readdir returned %s"
+ ". Aborting fix-layout",strerror(errno));
+ goto out;
+ }
+
+ /* Need to keep track of ENOENT errno, that means, there is no
+ need to send more readdirp() */
+ readdirp_errno = errno;
+
+ if (list_empty (&entries.list))
+ break;
+
+ free_entries = _gf_true;
+
+ list_for_each_entry_safe (entry, tmp, &entries.list, list) {
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+ ret = 1;
+ goto out;
+ }
+
+ offset = entry->d_off;
+
+ if (!strcmp (entry->d_name, ".") ||
+ !strcmp (entry->d_name, ".."))
+ continue;
+
+ if (!IA_ISDIR (entry->d_stat.ia_type))
+ continue;
+
+ loc_wipe (&entry_loc);
+ ret =dht_build_child_loc (this, &entry_loc, loc,
+ entry->d_name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Child loc"
+ " build failed");
+ goto out;
+ }
+
+ if (uuid_is_null (entry->d_stat.ia_gfid)) {
+ gf_log (this->name, GF_LOG_ERROR, "%s/%s"
+ " gfid not present", loc->path,
+ entry->d_name);
+ continue;
+ }
+
+ entry_loc.inode->ia_type = entry->d_stat.ia_type;
+
+ uuid_copy (entry_loc.gfid, entry->d_stat.ia_gfid);
+ if (uuid_is_null (loc->gfid)) {
+ gf_log (this->name, GF_LOG_ERROR, "%s/%s"
+ " gfid not present", loc->path,
+ entry->d_name);
+ continue;
+ }
+
+ uuid_copy (entry_loc.pargfid, loc->gfid);
+
+ ret = syncop_lookup (this, &entry_loc, NULL, &iatt,
+ NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s"
+ " lookup failed", entry_loc.path);
+ continue;
+ }
+
+ ret = syncop_setxattr (this, &entry_loc, fix_layout,
+ 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Setxattr "
+ "failed for %s", entry_loc.path);
+ defrag->defrag_status =
+ GF_DEFRAG_STATUS_FAILED;
+ defrag->total_failures ++;
+ goto out;
+ }
+ ret = gf_defrag_fix_layout (this, defrag, &entry_loc,
+ fix_layout, migrate_data);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Fix layout "
+ "failed for %s", entry_loc.path);
+ defrag->total_failures++;
+ goto out;
+ }
+
+ }
+ gf_dirent_free (&entries);
+ free_entries = _gf_false;
+ INIT_LIST_HEAD (&entries.list);
+ if (readdirp_errno == ENOENT)
+ break;
+ }
+
+ ret = 0;
+out:
+ if (free_entries)
+ gf_dirent_free (&entries);
+
+ loc_wipe (&entry_loc);
+
+ if (dict)
+ dict_unref(dict);
+
+ if (fd)
+ fd_unref (fd);
+
+ return ret;
+
+}
+
+
+int
+gf_defrag_start_crawl (void *data)
+{
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ gf_defrag_info_t *defrag = NULL;
+ int ret = -1;
+ loc_t loc = {0,};
+ struct iatt iatt = {0,};
+ struct iatt parent = {0,};
+ dict_t *fix_layout = NULL;
+ dict_t *migrate_data = NULL;
+ dict_t *status = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+
+ this = data;
+ if (!this)
+ goto out;
+
+ ctx = this->ctx;
+ if (!ctx)
+ goto out;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ defrag = conf->defrag;
+ if (!defrag)
+ goto out;
+
+ gettimeofday (&defrag->start_time, NULL);
+ dht_build_root_inode (this, &defrag->root_inode);
+ if (!defrag->root_inode)
+ goto out;
+
+ dht_build_root_loc (defrag->root_inode, &loc);
+
+ /* fix-layout on '/' first */
+
+ ret = syncop_lookup (this, &loc, NULL, &iatt, NULL, &parent);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "look up on / failed");
+ goto out;
+ }
+
+ fix_layout = dict_new ();
+ if (!fix_layout) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str (fix_layout, GF_XATTR_FIX_LAYOUT_KEY, "yes");
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set dict str");
+ goto out;
+ }
+
+ ret = syncop_setxattr (this, &loc, fix_layout, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "fix layout on %s failed",
+ loc.path);
+ defrag->total_failures++;
+ goto out;
+ }
+
+ if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) {
+ migrate_data = dict_new ();
+ if (!migrate_data) {
+ ret = -1;
+ goto out;
+ }
+ if (defrag->cmd == GF_DEFRAG_CMD_START_FORCE)
+ ret = dict_set_str (migrate_data,
+ "distribute.migrate-data", "force");
+ else
+ ret = dict_set_str (migrate_data,
+ "distribute.migrate-data",
+ "non-force");
+ if (ret)
+ goto out;
+ }
+ ret = gf_defrag_fix_layout (this, defrag, &loc, fix_layout,
+ migrate_data);
+ if ((defrag->defrag_status != GF_DEFRAG_STATUS_STOPPED) &&
+ (defrag->defrag_status != GF_DEFRAG_STATUS_FAILED)) {
+ defrag->defrag_status = GF_DEFRAG_STATUS_COMPLETE;
+ }
+
+
+
+out:
+ LOCK (&defrag->lock);
+ {
+ status = dict_new ();
+ gf_defrag_status_get (defrag, status);
+ if (ctx->notify)
+ ctx->notify (GF_EN_DEFRAG_STATUS, status);
+ if (status)
+ dict_unref (status);
+ defrag->is_exiting = 1;
+ }
+ UNLOCK (&defrag->lock);
+
+ if (defrag) {
+ GF_FREE (defrag);
+ conf->defrag = NULL;
+ }
+
+ return ret;
+}
+
+
+static int
+gf_defrag_done (int ret, call_frame_t *sync_frame, void *data)
+{
+ gf_listener_stop (sync_frame->this);
+
+ STACK_DESTROY (sync_frame->root);
+ kill (getpid(), SIGTERM);
+ return 0;
+}
+
+void *
+gf_defrag_start (void *data)
+{
+ int ret = -1;
+ call_frame_t *frame = NULL;
+ dht_conf_t *conf = NULL;
+ gf_defrag_info_t *defrag = NULL;
+ xlator_t *this = NULL;
+
+ this = data;
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ defrag = conf->defrag;
+ if (!defrag)
+ goto out;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ frame->root->pid = GF_CLIENT_PID_DEFRAG;
+
+ defrag->pid = frame->root->pid;
+
+ defrag->defrag_status = GF_DEFRAG_STATUS_STARTED;
+
+ ret = synctask_new (this->ctx->env, gf_defrag_start_crawl,
+ gf_defrag_done, frame, this);
+
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Could not create"
+ " task for rebalance");
+out:
+ return NULL;
+}
+
+int
+gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict)
+{
+ int ret = 0;
+ uint64_t files = 0;
+ uint64_t size = 0;
+ uint64_t lookup = 0;
+ uint64_t failures = 0;
+ uint64_t skipped = 0;
+ char *status = "";
+ double elapsed = 0;
+ struct timeval end = {0,};
+
+
+ if (!defrag)
+ goto out;
+
+ ret = 0;
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED)
+ goto out;
+
+ files = defrag->total_files;
+ size = defrag->total_data;
+ lookup = defrag->num_files_lookedup;
+ failures = defrag->total_failures;
+ skipped = defrag->skipped;
+
+ gettimeofday (&end, NULL);
+
+ elapsed = end.tv_sec - defrag->start_time.tv_sec;
+
+ if (!dict)
+ goto log;
+
+ ret = dict_set_uint64 (dict, "files", files);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set file count");
+
+ ret = dict_set_uint64 (dict, "size", size);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set size of xfer");
+
+ ret = dict_set_uint64 (dict, "lookups", lookup);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set lookedup file count");
+
+
+ ret = dict_set_int32 (dict, "status", defrag->defrag_status);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set status");
+ if (elapsed) {
+ ret = dict_set_double (dict, "run-time", elapsed);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set run-time");
+ }
+
+ ret = dict_set_uint64 (dict, "failures", failures);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set failure count");
+
+ ret = dict_set_uint64 (dict, "skipped", skipped);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set skipped file count");
+log:
+ switch (defrag->defrag_status) {
+ case GF_DEFRAG_STATUS_NOT_STARTED:
+ status = "not started";
+ break;
+ case GF_DEFRAG_STATUS_STARTED:
+ status = "in progress";
+ break;
+ case GF_DEFRAG_STATUS_STOPPED:
+ status = "stopped";
+ break;
+ case GF_DEFRAG_STATUS_COMPLETE:
+ status = "completed";
+ break;
+ case GF_DEFRAG_STATUS_FAILED:
+ status = "failed";
+ break;
+ default:
+ break;
+ }
+
+ gf_log (THIS->name, GF_LOG_INFO, "Rebalance is %s. Time taken is %.2f "
+ "secs", status, elapsed);
+ gf_log (THIS->name, GF_LOG_INFO, "Files migrated: %"PRIu64", size: %"
+ PRIu64", lookups: %"PRIu64", failures: %"PRIu64", skipped: "
+ "%"PRIu64, files, size, lookup, failures, skipped);
+
+
+out:
+ return 0;
+}
+
+int
+gf_defrag_stop (gf_defrag_info_t *defrag, dict_t *output)
+{
+ /* TODO: set a variable 'stop_defrag' here, it should be checked
+ in defrag loop */
+ int ret = -1;
+ GF_ASSERT (defrag);
+
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED) {
+ goto out;
+ }
+
+ gf_log ("", GF_LOG_INFO, "Received stop command on rebalance");
+ defrag->defrag_status = GF_DEFRAG_STATUS_STOPPED;
+
+ if (output)
+ gf_defrag_status_get (defrag, output);
+ ret = 0;
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
diff --git a/xlators/cluster/dht/src/dht-rename.c b/xlators/cluster/dht/src/dht-rename.c
index 6c34dabad..5d6f4f232 100644
--- a/xlators/cluster/dht/src/dht-rename.c
+++ b/xlators/cluster/dht/src/dht-rename.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
/* TODO: link(oldpath, newpath) fails if newpath already exists. DHT should
@@ -33,373 +24,735 @@
int
dht_rename_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *stbuf)
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ if (op_ret == -1) {
+ /* TODO: undo the damage */
+
+ gf_log (this->name, GF_LOG_INFO,
+ "rename %s -> %s on %s failed (%s)",
+ local->loc.path, local->loc2.path,
+ prev->this->name, strerror (op_errno));
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto unwind;
+ }
+ /* TODO: construct proper stbuf for dir */
+ /*
+ * FIXME: is this the correct way to build stbuf and
+ * parent bufs?
+ */
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+ dht_iatt_merge (this, &local->preoldparent, preoldparent,
+ prev->this);
+ dht_iatt_merge (this, &local->postoldparent, postoldparent,
+ prev->this);
+ dht_iatt_merge (this, &local->preparent, prenewparent,
+ prev->this);
+ dht_iatt_merge (this, &local->postparent, postnewparent,
+ prev->this);
- local = frame->local;
- prev = cookie;
-
- if (op_ret == -1) {
- /* TODO: undo the damage */
-
- gf_log (this->name, GF_LOG_DEBUG,
- "rename %s -> %s on %s failed (%s)",
- local->loc.path, local->loc2.path,
- prev->this->name, strerror (op_errno));
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- } else {
- /* TODO: construct proper stbuf for dir */
- local->stbuf = *stbuf;
- }
+unwind:
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ WIPE (&local->preoldparent);
+ WIPE (&local->postoldparent);
+ WIPE (&local->preparent);
+ WIPE (&local->postparent);
+
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
+ DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
+ &local->stbuf, &local->preoldparent,
+ &local->postoldparent,
+ &local->preparent, &local->postparent, xdata);
+ }
+
+ return 0;
+}
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stbuf);
- }
- return 0;
+int
+dht_rename_hashed_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
+ struct iatt *preoldparent,
+ struct iatt *postoldparent,
+ struct iatt *prenewparent,
+ struct iatt *postnewparent, dict_t *xdata)
+{
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ int call_cnt = 0;
+ call_frame_t *prev = NULL;
+ int i = 0;
+
+ conf = this->private;
+ local = frame->local;
+ prev = cookie;
+
+ if (op_ret == -1) {
+ /* TODO: undo the damage */
+
+ gf_log (this->name, GF_LOG_INFO,
+ "rename %s -> %s on %s failed (%s)",
+ local->loc.path, local->loc2.path,
+ prev->this->name, strerror (op_errno));
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto unwind;
+ }
+ /* TODO: construct proper stbuf for dir */
+ /*
+ * FIXME: is this the correct way to build stbuf and
+ * parent bufs?
+ */
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+ dht_iatt_merge (this, &local->preoldparent, preoldparent,
+ prev->this);
+ dht_iatt_merge (this, &local->postoldparent, postoldparent,
+ prev->this);
+ dht_iatt_merge (this, &local->preparent, prenewparent,
+ prev->this);
+ dht_iatt_merge (this, &local->postparent, postnewparent,
+ prev->this);
+
+ call_cnt = local->call_cnt = conf->subvolume_cnt - 1;
+
+ if (!local->call_cnt)
+ goto unwind;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == local->dst_hashed)
+ continue;
+ STACK_WIND (frame, dht_rename_dir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->rename,
+ &local->loc, &local->loc2, NULL);
+ if (!--call_cnt)
+ break;
+ }
+
+
+ return 0;
+unwind:
+ WIPE (&local->preoldparent);
+ WIPE (&local->postoldparent);
+ WIPE (&local->preparent);
+ WIPE (&local->postparent);
+
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
+ DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
+ &local->stbuf, &local->preoldparent,
+ &local->postoldparent,
+ &local->preparent, &local->postparent, NULL);
+
+ return 0;
}
-
int
dht_rename_dir_do (call_frame_t *frame, xlator_t *this)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int i = 0;
-
- conf = this->private;
- local = frame->local;
+ dht_local_t *local = NULL;
- if (local->op_ret == -1)
- goto err;
+ local = frame->local;
- local->call_cnt = conf->subvolume_cnt;
- local->op_ret = 0;
+ if (local->op_ret == -1)
+ goto err;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (frame, dht_rename_dir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->rename,
- &local->loc, &local->loc2);
- }
+ local->op_ret = 0;
- return 0;
+ STACK_WIND (frame, dht_rename_hashed_dir_cbk,
+ local->dst_hashed,
+ local->dst_hashed->fops->rename,
+ &local->loc, &local->loc2, NULL);
+ return 0;
err:
- DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno);
- return 0;
+ DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno, NULL, NULL,
+ NULL, NULL, NULL, NULL);
+ return 0;
}
int
dht_rename_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *entries)
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = -1;
- call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = -1;
+ call_frame_t *prev = NULL;
- local = frame->local;
- prev = cookie;
+ local = frame->local;
+ prev = cookie;
- if (op_ret > 2) {
- gf_log (this->name, GF_LOG_TRACE,
- "readdir on %s for %s returned %d entries",
- prev->this->name, local->loc.path, op_ret);
- local->op_ret = -1;
- local->op_errno = ENOTEMPTY;
- }
+ if (op_ret > 2) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "readdir on %s for %s returned %d entries",
+ prev->this->name, local->loc.path, op_ret);
+ local->op_ret = -1;
+ local->op_errno = ENOTEMPTY;
+ }
- this_call_cnt = dht_frame_return (frame);
+ this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- dht_rename_dir_do (frame, this);
- }
+ if (is_last_call (this_call_cnt)) {
+ dht_rename_dir_do (frame, this);
+ }
- return 0;
+ return 0;
}
int
dht_rename_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, fd_t *fd)
+ int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = -1;
- call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = -1;
+ call_frame_t *prev = NULL;
- local = frame->local;
- prev = cookie;
+ local = frame->local;
+ prev = cookie;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "opendir on %s for %s failed (%s)",
- prev->this->name, local->loc.path,
- strerror (op_errno));
- goto err;
- }
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "opendir on %s for %s failed (%s)",
+ prev->this->name, local->loc.path,
+ strerror (op_errno));
+ goto err;
+ }
- STACK_WIND (frame, dht_rename_readdir_cbk,
- prev->this, prev->this->fops->readdir,
- local->fd, 4096, 0);
+ STACK_WIND (frame, dht_rename_readdir_cbk,
+ prev->this, prev->this->fops->readdir,
+ local->fd, 4096, 0, NULL);
- return 0;
+ return 0;
err:
- this_call_cnt = dht_frame_return (frame);
+ this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- dht_rename_dir_do (frame, this);
- }
+ if (is_last_call (this_call_cnt)) {
+ dht_rename_dir_do (frame, this);
+ }
- return 0;
+ return 0;
}
int
dht_rename_dir (call_frame_t *frame, xlator_t *this)
{
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- int i = 0;
- int op_errno = -1;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ int i = 0;
+ int op_errno = -1;
- conf = frame->this->private;
- local = frame->local;
+ conf = frame->this->private;
+ local = frame->local;
- local->call_cnt = conf->subvolume_cnt;
+ local->call_cnt = conf->subvolume_cnt;
- local->fd = fd_create (local->loc.inode, frame->root->pid);
- if (!local->fd) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->subvolume_status[i]) {
+ gf_log (this->name, GF_LOG_INFO,
+ "one of the subvolumes down (%s)",
+ conf->subvolumes[i]->name);
+ op_errno = ENOTCONN;
+ goto err;
+ }
+ }
- local->op_ret = 0;
+ local->fd = fd_create (local->loc.inode, frame->root->pid);
+ if (!local->fd) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- if (!local->dst_cached) {
- dht_rename_dir_do (frame, this);
- return 0;
- }
+ local->op_ret = 0;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (frame, dht_rename_opendir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->opendir,
- &local->loc2, local->fd);
- }
+ if (!local->dst_cached) {
+ dht_rename_dir_do (frame, this);
+ return 0;
+ }
- return 0;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND (frame, dht_rename_opendir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->opendir,
+ &local->loc2, local->fd, NULL);
+ }
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL, NULL);
+ return 0;
}
+#define DHT_MARK_FOP_INTERNAL(xattr) do { \
+ int tmp = -1; \
+ if (!xattr) { \
+ xattr = dict_new (); \
+ if (!xattr) \
+ break; \
+ } \
+ tmp = dict_set_str (xattr, GLUSTERFS_INTERNAL_FOP_KEY, "yes"); \
+ if (tmp) { \
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set" \
+ " internal dict key for %s", local->loc.path); \
+ } \
+ }while (0)
+int
+dht_rename_done (call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+ if (local->linked == _gf_true) {
+ local->linked = _gf_false;
+ dht_linkfile_attr_heal (frame, this);
+ }
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
+ DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
+ &local->stbuf, &local->preoldparent,
+ &local->postoldparent, &local->preparent,
+ &local->postparent, NULL);
+
+ return 0;
+}
int
dht_rename_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- int this_call_cnt = 0;
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int this_call_cnt = 0;
+
+ local = frame->local;
+ prev = cookie;
+
+ if (!local) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "!local, should not happen");
+ goto out;
+ }
+
+ this_call_cnt = dht_frame_return (frame);
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: unlink on %s failed (%s)",
+ local->loc.path, prev->this->name, strerror (op_errno));
+ }
+
+ WIPE (&local->preoldparent);
+ WIPE (&local->postoldparent);
+ WIPE (&local->preparent);
+ WIPE (&local->postparent);
+
+ if (is_last_call (this_call_cnt)) {
+ dht_rename_done (frame, this);
+ }
+
+out:
+ return 0;
+}
- local = frame->local;
- prev = cookie;
- this_call_cnt = dht_frame_return (frame);
+int
+dht_rename_cleanup (call_frame_t *frame)
+{
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ xlator_t *src_hashed = NULL;
+ xlator_t *src_cached = NULL;
+ xlator_t *dst_hashed = NULL;
+ xlator_t *dst_cached = NULL;
+ int call_cnt = 0;
+ dict_t *xattr = NULL;
+
+ local = frame->local;
+ this = frame->this;
+
+ src_hashed = local->src_hashed;
+ src_cached = local->src_cached;
+ dst_hashed = local->dst_hashed;
+ dst_cached = local->dst_cached;
+
+ if (src_cached == dst_cached)
+ goto nolinks;
+
+ if (dst_hashed != src_hashed && dst_hashed != src_cached)
+ call_cnt++;
+
+ if (src_cached != dst_hashed)
+ call_cnt++;
+
+ local->call_cnt = call_cnt;
+
+ if (!call_cnt)
+ goto nolinks;
+
+ DHT_MARK_FOP_INTERNAL (xattr);
+
+ if (dst_hashed != src_hashed && dst_hashed != src_cached) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "unlinking linkfile %s @ %s => %s",
+ local->loc.path, dst_hashed->name, src_cached->name);
+ STACK_WIND (frame, dht_rename_unlink_cbk,
+ dst_hashed, dst_hashed->fops->unlink,
+ &local->loc, 0, xattr);
+ }
+
+ if (src_cached != dst_hashed) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "unlinking link %s => %s (%s)", local->loc.path,
+ local->loc2.path, src_cached->name);
+ STACK_WIND (frame, dht_rename_unlink_cbk,
+ src_cached, src_cached->fops->unlink,
+ &local->loc2, 0, xattr);
+ }
+
+ if (xattr)
+ dict_unref (xattr);
+
+ return 0;
+
+nolinks:
+ WIPE (&local->preoldparent);
+ WIPE (&local->postoldparent);
+ WIPE (&local->preparent);
+ WIPE (&local->postparent);
+
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
+ DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
+ &local->stbuf, &local->preoldparent,
+ &local->postoldparent, &local->preparent,
+ &local->postparent, NULL);
+
+ return 0;
+}
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "unlink on %s failed (%s)",
- prev->this->name, strerror (op_errno));
- }
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stbuf);
+int
+dht_rename_links_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
- return 0;
+ prev = cookie;
+ local = frame->local;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "link/file %s on %s failed (%s)",
+ local->loc.path, prev->this->name, strerror (op_errno));
+ }
+
+ if (local->linked == _gf_true) {
+ local->linked = _gf_false;
+ dht_linkfile_attr_heal (frame, this);
+ }
+ DHT_STACK_DESTROY (frame);
+
+ return 0;
}
int
dht_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *stbuf)
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- xlator_t *src_hashed = NULL;
- xlator_t *src_cached = NULL;
- xlator_t *dst_hashed = NULL;
- xlator_t *dst_cached = NULL;
- xlator_t *rename_subvol = NULL;
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *src_hashed = NULL;
+ xlator_t *src_cached = NULL;
+ xlator_t *dst_hashed = NULL;
+ xlator_t *dst_cached = NULL;
+ xlator_t *rename_subvol = NULL;
+ call_frame_t *link_frame = NULL;
+ dht_local_t *link_local = NULL;
+ dict_t *xattr = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ src_hashed = local->src_hashed;
+ src_cached = local->src_cached;
+ dst_hashed = local->dst_hashed;
+ dst_cached = local->dst_cached;
+
+ if (local->linked == _gf_true)
+ FRAME_SU_UNDO (frame, dht_local_t);
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: rename on %s failed (%s)", local->loc.path,
+ prev->this->name, strerror (op_errno));
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto cleanup;
+ }
+
+ if ((src_cached == dst_cached) && (dst_hashed != dst_cached)) {
+ link_frame = copy_frame (frame);
+ if (!link_frame) {
+ goto err;
+ }
+
+ /* fop value sent as maxvalue because it is not used
+ anywhere in this case */
+ link_local = dht_local_init (link_frame, &local->loc2, NULL,
+ GF_FOP_MAXVALUE);
+ if (!link_local) {
+ goto err;
+ }
+
+ if (link_local->loc.inode)
+ inode_unref (link_local->loc.inode);
+ link_local->loc.inode = inode_ref (local->loc.inode);
+ uuid_copy (link_local->gfid, local->loc.inode->gfid);
+
+ dht_linkfile_create (link_frame, dht_rename_links_create_cbk,
+ this, src_cached, dst_hashed,
+ &link_local->loc);
+ }
- local = frame->local;
- prev = cookie;
-
- src_hashed = local->src_hashed;
- src_cached = local->src_cached;
- dst_hashed = local->dst_hashed;
- dst_cached = local->dst_cached;
+err:
+ /* Merge attrs only from src_cached. In case there of src_cached !=
+ * dst_hashed, this ignores linkfile attrs. */
+ if (prev->this == src_cached) {
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+ dht_iatt_merge (this, &local->preoldparent, preoldparent,
+ prev->this);
+ dht_iatt_merge (this, &local->postoldparent, postoldparent,
+ prev->this);
+ dht_iatt_merge (this, &local->preparent, prenewparent,
+ prev->this);
+ dht_iatt_merge (this, &local->postparent, postnewparent,
+ prev->this);
+ }
+
+
+ /* NOTE: rename_subvol is the same subvolume from which dht_rename_cbk
+ * is called. since rename has already happened on rename_subvol,
+ * unlink should not be sent for oldpath (either linkfile or cached-file)
+ * on rename_subvol. */
+ if (src_cached == dst_cached)
+ rename_subvol = src_cached;
+ else
+ rename_subvol = dst_hashed;
+
+ /* TODO: delete files in background */
+
+ if (src_cached != dst_hashed && src_cached != dst_cached)
+ local->call_cnt++;
+
+ if (src_hashed != rename_subvol && src_hashed != src_cached)
+ local->call_cnt++;
+
+ if (dst_cached && dst_cached != dst_hashed && dst_cached != src_cached)
+ local->call_cnt++;
+
+ if (local->call_cnt == 0)
+ goto unwind;
+
+ DHT_MARK_FOP_INTERNAL (xattr);
+
+ if (src_cached != dst_hashed && src_cached != dst_cached) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "deleting old src datafile %s @ %s",
+ local->loc.path, src_cached->name);
+
+ STACK_WIND (frame, dht_rename_unlink_cbk,
+ src_cached, src_cached->fops->unlink,
+ &local->loc, 0, xattr);
+ }
+
+ if (src_hashed != rename_subvol && src_hashed != src_cached) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "deleting old src linkfile %s @ %s",
+ local->loc.path, src_hashed->name);
+
+ STACK_WIND (frame, dht_rename_unlink_cbk,
+ src_hashed, src_hashed->fops->unlink,
+ &local->loc, 0, xattr);
+ }
+
+ if (dst_cached
+ && (dst_cached != dst_hashed)
+ && (dst_cached != src_cached)) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "deleting old dst datafile %s @ %s",
+ local->loc2.path, dst_cached->name);
+
+ STACK_WIND (frame, dht_rename_unlink_cbk,
+ dst_cached, dst_cached->fops->unlink,
+ &local->loc2, 0, xattr);
+ }
+ if (xattr)
+ dict_unref (xattr);
+ return 0;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "rename on %s failed (%s)", prev->this->name,
- strerror (op_errno));
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- goto unwind;
- }
-
- /* NOTE: rename_subvol is the same subvolume from which dht_rename_cbk
- * is called. since rename has already happened on rename_subvol,
- * unlink should not be sent for oldpath (either linkfile or cached-file)
- * on rename_subvol. */
- if (src_cached == dst_cached)
- rename_subvol = src_cached;
- else
- rename_subvol = dst_hashed;
+unwind:
+ WIPE (&local->preoldparent);
+ WIPE (&local->postoldparent);
+ WIPE (&local->preparent);
+ WIPE (&local->postparent);
+ if (xattr)
+ dict_unref (xattr);
- /* TODO: delete files in background */
+ dht_rename_done (frame, this);
- if (src_cached != dst_hashed && src_cached != dst_cached)
- local->call_cnt++;
+ return 0;
- if (src_hashed != rename_subvol && src_hashed != src_cached)
- local->call_cnt++;
+cleanup:
+ if (xattr)
+ dict_unref (xattr);
+ dht_rename_cleanup (frame);
- if (dst_cached && dst_cached != dst_hashed && dst_cached != src_cached)
- local->call_cnt++;
+ return 0;
+}
- if (local->call_cnt == 0)
- goto unwind;
- if (src_cached != dst_hashed && src_cached != dst_cached) {
- gf_log (this->name, GF_LOG_TRACE,
- "deleting old src datafile %s @ %s",
- local->loc.path, src_cached->name);
+int
+dht_do_rename (call_frame_t *frame)
+{
+ dht_local_t *local = NULL;
+ xlator_t *dst_hashed = NULL;
+ xlator_t *src_cached = NULL;
+ xlator_t *dst_cached = NULL;
+ xlator_t *this = NULL;
+ xlator_t *rename_subvol = NULL;
- STACK_WIND (frame, dht_rename_unlink_cbk,
- src_cached, src_cached->fops->unlink,
- &local->loc);
- }
- if (src_hashed != rename_subvol && src_hashed != src_cached) {
- gf_log (this->name, GF_LOG_TRACE,
- "deleting old src linkfile %s @ %s",
- local->loc.path, src_hashed->name);
+ local = frame->local;
+ this = frame->this;
- STACK_WIND (frame, dht_rename_unlink_cbk,
- src_hashed, src_hashed->fops->unlink,
- &local->loc);
- }
+ dst_hashed = local->dst_hashed;
+ dst_cached = local->dst_cached;
+ src_cached = local->src_cached;
- if (dst_cached
- && (dst_cached != dst_hashed)
- && (dst_cached != src_cached)) {
- gf_log (this->name, GF_LOG_TRACE,
- "deleting old dst datafile %s @ %s",
- local->loc2.path, dst_cached->name);
+ if (src_cached == dst_cached)
+ rename_subvol = src_cached;
+ else
+ rename_subvol = dst_hashed;
- STACK_WIND (frame, dht_rename_unlink_cbk,
- dst_cached, dst_cached->fops->unlink,
- &local->loc2);
- }
- return 0;
+ gf_log (this->name, GF_LOG_TRACE,
+ "renaming %s => %s (%s)",
+ local->loc.path, local->loc2.path, rename_subvol->name);
-unwind:
- DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stbuf);
+ if (local->linked == _gf_true)
+ FRAME_SU_DO (frame, dht_local_t);
+ STACK_WIND (frame, dht_rename_cbk,
+ rename_subvol, rename_subvol->fops->rename,
+ &local->loc, &local->loc2, NULL);
- return 0;
+ return 0;
}
int
-dht_do_rename (call_frame_t *frame)
+dht_rename_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- xlator_t *dst_hashed = NULL;
- xlator_t *src_cached = NULL;
- xlator_t *dst_cached = NULL;
- xlator_t *this = NULL;
- xlator_t *rename_subvol = NULL;
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int this_call_cnt = 0;
- local = frame->local;
- this = frame->this;
+ local = frame->local;
+ prev = cookie;
- dst_hashed = local->dst_hashed;
- dst_cached = local->dst_cached;
- src_cached = local->src_cached;
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "link/file on %s failed (%s)",
+ prev->this->name, strerror (op_errno));
+ local->op_ret = -1;
+ if (op_errno != ENOENT)
+ local->op_errno = op_errno;
+ } else if (local->src_cached == prev->this) {
+ /* merge of attr returned only from linkfile creation */
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+ }
- if (src_cached == dst_cached)
- rename_subvol = src_cached;
- else
- rename_subvol = dst_hashed;
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ if (local->op_ret == -1)
+ goto cleanup;
- gf_log (this->name, GF_LOG_TRACE,
- "renaming %s => %s (%s)",
- local->loc.path, local->loc2.path, rename_subvol->name);
+ dht_do_rename (frame);
+ }
- STACK_WIND (frame, dht_rename_cbk,
- rename_subvol, rename_subvol->fops->rename,
- &local->loc, &local->loc2);
+ return 0;
- return 0;
+cleanup:
+ dht_rename_cleanup (frame);
+
+ return 0;
}
int
-dht_rename_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *stbuf)
+dht_rename_unlink_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
- int this_call_cnt = 0;
local = frame->local;
prev = cookie;
-
- if (op_ret == -1) {
+
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
gf_log (this->name, GF_LOG_DEBUG,
- "link/file on %s failed (%s)",
- prev->this->name, strerror (op_errno));
+ "unlink of %s on %s failed (%s)",
+ local->loc2.path, prev->this->name,
+ strerror (op_errno));
local->op_ret = -1;
local->op_errno = op_errno;
}
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- if (local->op_ret == -1)
- goto unwind;
-
- dht_do_rename (frame);
- }
+ if (local->op_ret == -1)
+ goto cleanup;
+
+ dht_do_rename (frame);
return 0;
-unwind:
- DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stbuf);
+cleanup:
+ dht_rename_cleanup (frame);
return 0;
}
@@ -408,39 +761,54 @@ unwind:
int
dht_rename_create_links (call_frame_t *frame)
{
- dht_local_t *local = NULL;
- xlator_t *this = NULL;
- xlator_t *src_hashed = NULL;
- xlator_t *src_cached = NULL;
- xlator_t *dst_hashed = NULL;
- xlator_t *dst_cached = NULL;
- int call_cnt = 0;
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ xlator_t *src_hashed = NULL;
+ xlator_t *src_cached = NULL;
+ xlator_t *dst_hashed = NULL;
+ xlator_t *dst_cached = NULL;
+ int call_cnt = 0;
+ dict_t *xattr = NULL;
- local = frame->local;
- this = frame->this;
+ local = frame->local;
+ this = frame->this;
- src_hashed = local->src_hashed;
- src_cached = local->src_cached;
- dst_hashed = local->dst_hashed;
- dst_cached = local->dst_cached;
+ src_hashed = local->src_hashed;
+ src_cached = local->src_cached;
+ dst_hashed = local->dst_hashed;
+ dst_cached = local->dst_cached;
- if (src_cached == dst_cached)
- goto nolinks;
+ DHT_MARK_FOP_INTERNAL (xattr);
- if (dst_hashed != src_hashed && dst_hashed != src_cached)
- call_cnt++;
+ if (src_cached == dst_cached) {
+ if (dst_hashed == dst_cached)
+ goto nolinks;
- if (src_cached != dst_hashed)
- call_cnt++;
+ gf_log (this->name, GF_LOG_TRACE,
+ "unlinking dst linkfile %s @ %s",
+ local->loc2.path, dst_hashed->name);
- local->call_cnt = call_cnt;
+ STACK_WIND (frame, dht_rename_unlink_links_cbk,
+ dst_hashed, dst_hashed->fops->unlink,
+ &local->loc2, 0, xattr);
+ return 0;
+ }
- if (dst_hashed != src_hashed && dst_hashed != src_cached) {
- gf_log (this->name, GF_LOG_TRACE,
- "linkfile %s @ %s => %s",
- local->loc.path, dst_hashed->name, src_cached->name);
- dht_linkfile_create (frame, dht_rename_links_cbk,
+ if (dst_hashed != src_hashed && dst_hashed != src_cached)
+ call_cnt++;
+
+ if (src_cached != dst_hashed)
+ call_cnt++;
+
+ local->call_cnt = call_cnt;
+
+ if (dst_hashed != src_hashed && dst_hashed != src_cached) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "linkfile %s @ %s => %s",
+ local->loc.path, dst_hashed->name, src_cached->name);
+ memcpy (local->gfid, local->loc.inode->gfid, 16);
+ dht_linkfile_create (frame, dht_rename_links_cbk, this,
src_cached, dst_hashed, &local->loc);
}
@@ -450,113 +818,107 @@ dht_rename_create_links (call_frame_t *frame)
local->loc2.path, src_cached->name);
STACK_WIND (frame, dht_rename_links_cbk,
src_cached, src_cached->fops->link,
- &local->loc, &local->loc2);
+ &local->loc, &local->loc2, xattr);
}
nolinks:
- if (!call_cnt) {
- /* skip to next step */
- dht_do_rename (frame);
- }
-
- return 0;
+ if (!call_cnt) {
+ /* skip to next step */
+ dht_do_rename (frame);
+ }
+ if (xattr)
+ dict_unref (xattr);
+
+ return 0;
}
int
dht_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
- xlator_t *src_cached = NULL;
- xlator_t *src_hashed = NULL;
- xlator_t *dst_cached = NULL;
- xlator_t *dst_hashed = NULL;
- int op_errno = -1;
- int ret = -1;
- dht_local_t *local = NULL;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (oldloc, err);
- VALIDATE_OR_GOTO (newloc, err);
-
- src_hashed = dht_subvol_get_hashed (this, oldloc);
- if (!src_hashed) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- oldloc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- src_cached = dht_subvol_get_cached (this, oldloc->inode);
- if (!src_cached) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", oldloc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- dst_hashed = dht_subvol_get_hashed (this, newloc);
- if (!dst_hashed) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- newloc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- if (newloc->inode)
- dst_cached = dht_subvol_get_cached (this, newloc->inode);
-
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- ret = loc_copy (&local->loc, oldloc);
- if (ret == -1) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- ret = loc_copy (&local->loc2, newloc);
- if (ret == -1) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- local->src_hashed = src_hashed;
- local->src_cached = src_cached;
- local->dst_hashed = dst_hashed;
- local->dst_cached = dst_cached;
-
- gf_log (this->name, GF_LOG_TRACE,
- "renaming %s (hash=%s/cache=%s) => %s (hash=%s/cache=%s)",
- oldloc->path, src_hashed->name, src_cached->name,
- newloc->path, dst_hashed->name,
- dst_cached ? dst_cached->name : "<nul>");
-
- if (S_ISDIR (oldloc->inode->st_mode)) {
- dht_rename_dir (frame, this);
- } else {
- local->op_ret = 0;
- dht_rename_create_links (frame);
- }
-
- return 0;
+ xlator_t *src_cached = NULL;
+ xlator_t *src_hashed = NULL;
+ xlator_t *dst_cached = NULL;
+ xlator_t *dst_hashed = NULL;
+ int op_errno = -1;
+ int ret = -1;
+ dht_local_t *local = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (oldloc, err);
+ VALIDATE_OR_GOTO (newloc, err);
+
+ src_hashed = dht_subvol_get_hashed (this, oldloc);
+ if (!src_hashed) {
+ gf_log (this->name, GF_LOG_INFO,
+ "no subvolume in layout for path=%s",
+ oldloc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ src_cached = dht_subvol_get_cached (this, oldloc->inode);
+ if (!src_cached) {
+ gf_log (this->name, GF_LOG_INFO,
+ "no cached subvolume for path=%s", oldloc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ dst_hashed = dht_subvol_get_hashed (this, newloc);
+ if (!dst_hashed) {
+ gf_log (this->name, GF_LOG_INFO,
+ "no subvolume in layout for path=%s",
+ newloc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (newloc->inode)
+ dst_cached = dht_subvol_get_cached (this, newloc->inode);
+
+ local = dht_local_init (frame, oldloc, NULL, GF_FOP_RENAME);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ /* cached_subvol will be set from dht_local_init, reset it to NULL,
+ as the logic of handling rename is different */
+ local->cached_subvol = NULL;
+
+ ret = loc_copy (&local->loc2, newloc);
+ if (ret == -1) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->src_hashed = src_hashed;
+ local->src_cached = src_cached;
+ local->dst_hashed = dst_hashed;
+ local->dst_cached = dst_cached;
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "renaming %s (hash=%s/cache=%s) => %s (hash=%s/cache=%s)",
+ oldloc->path, src_hashed->name, src_cached->name,
+ newloc->path, dst_hashed->name,
+ dst_cached ? dst_cached->name : "<nul>");
+
+ if (IA_ISDIR (oldloc->inode->ia_type)) {
+ dht_rename_dir (frame, this);
+ } else {
+ local->op_ret = 0;
+ dht_rename_create_links (frame);
+ }
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL, NULL);
- return 0;
+ return 0;
}
diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
index df8b2047b..3fe96b1c7 100644
--- a/xlators/cluster/dht/src/dht-selfheal.c
+++ b/xlators/cluster/dht/src/dht-selfheal.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -26,459 +17,1013 @@
#include "glusterfs.h"
#include "xlator.h"
#include "dht-common.h"
+#include "glusterfs-acl.h"
+
+#define DHT_SET_LAYOUT_RANGE(layout,i,srt,chunk,cnt,path) do { \
+ layout->list[i].start = srt; \
+ layout->list[i].stop = srt + chunk - 1; \
+ \
+ gf_log (this->name, GF_LOG_TRACE, \
+ "gave fix: %u - %u on %s for %s", \
+ layout->list[i].start, layout->list[i].stop, \
+ layout->list[i].xlator->name, path); \
+ } while (0)
+
+#define DHT_RESET_LAYOUT_RANGE(layout) do { \
+ int cnt = 0; \
+ for (cnt = 0; cnt < layout->cnt; cnt++ ) { \
+ layout->list[cnt].start = 0; \
+ layout->list[cnt].stop = 0; \
+ } \
+ } while (0)
+
+static uint32_t
+dht_overlap_calc (dht_layout_t *old, int o, dht_layout_t *new, int n)
+{
+ if (o >= old->cnt || n >= new->cnt)
+ return 0;
+
+ if (old->list[o].err > 0 || new->list[n].err > 0)
+ return 0;
+
+ if (old->list[o].start == old->list[o].stop) {
+ return 0;
+ }
+
+ if (new->list[n].start == new->list[n].stop) {
+ return 0;
+ }
+
+ if ((old->list[o].start > new->list[n].stop) ||
+ (old->list[o].stop < new->list[n].start))
+ return 0;
+
+ return min (old->list[o].stop, new->list[n].stop) -
+ max (old->list[o].start, new->list[n].start) + 1;
+}
int
dht_selfheal_dir_finish (call_frame_t *frame, xlator_t *this, int ret)
{
- dht_local_t *local = NULL;
-
+ dht_local_t *local = NULL;
- local = frame->local;
- local->selfheal.dir_cbk (frame, NULL, frame->this, ret,
- local->op_errno);
+ local = frame->local;
+ local->selfheal.dir_cbk (frame, NULL, frame->this, ret,
+ local->op_errno, NULL);
- return 0;
+ return 0;
}
int
dht_selfheal_dir_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
+ int op_ret, int op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- xlator_t *subvol = NULL;
- int i = 0;
- dht_layout_t *layout = NULL;
- int err = 0;
- int this_call_cnt = 0;
-
- local = frame->local;
- layout = local->selfheal.layout;
- prev = cookie;
- subvol = prev->this;
-
- if (op_ret == 0)
- err = 0;
- else
- err = op_errno;
-
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].xlator == subvol) {
- layout->list[i].err = err;
- break;
- }
- }
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *subvol = NULL;
+ int i = 0;
+ dht_layout_t *layout = NULL;
+ int err = 0;
+ int this_call_cnt = 0;
+
+ local = frame->local;
+ layout = local->selfheal.layout;
+ prev = cookie;
+ subvol = prev->this;
+
+ if (op_ret == 0)
+ err = 0;
+ else
+ err = op_errno;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].xlator == subvol) {
+ layout->list[i].err = err;
+ break;
+ }
+ }
- this_call_cnt = dht_frame_return (frame);
+ this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- dht_selfheal_dir_finish (frame, this, 0);
- }
+ if (is_last_call (this_call_cnt)) {
+ dht_selfheal_dir_finish (frame, this, 0);
+ }
- return 0;
+ return 0;
}
int
dht_selfheal_dir_xattr_persubvol (call_frame_t *frame, loc_t *loc,
- dht_layout_t *layout, int i)
+ dht_layout_t *layout, int i,
+ xlator_t *req_subvol)
{
- xlator_t *subvol = NULL;
- dict_t *xattr = NULL;
- int ret = 0;
- xlator_t *this = NULL;
- int32_t *disk_layout = NULL;
-
-
- subvol = layout->list[i].xlator;
- this = frame->this;
+ xlator_t *subvol = NULL;
+ dict_t *xattr = NULL;
+ int ret = 0;
+ xlator_t *this = NULL;
+ int32_t *disk_layout = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+
+ local = frame->local;
+ if (req_subvol)
+ subvol = req_subvol;
+ else
+ subvol = layout->list[i].xlator;
+ this = frame->this;
+
+ GF_VALIDATE_OR_GOTO ("", this, err);
+ GF_VALIDATE_OR_GOTO (this->name, layout, err);
+ GF_VALIDATE_OR_GOTO (this->name, local, err);
+ GF_VALIDATE_OR_GOTO (this->name, subvol, err);
+ VALIDATE_OR_GOTO (this->private, err);
+
+ conf = this->private;
+
+ xattr = get_new_dict ();
+ if (!xattr) {
+ goto err;
+ }
- xattr = get_new_dict ();
- if (!xattr) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ ret = dht_disk_layout_extract (this, layout, i, &disk_layout);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: (subvol %s) failed to extract disk layout",
+ loc->path, subvol->name);
+ goto err;
+ }
- ret = dht_disk_layout_extract (this, layout, i, &disk_layout);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to extract disk layout");
- goto err;
- }
+ ret = dict_set_bin (xattr, conf->xattr_name, disk_layout, 4 * 4);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: (subvol %s) failed to set xattr dictionary",
+ loc->path, subvol->name);
+ goto err;
+ }
+ disk_layout = NULL;
- ret = dict_set_bin (xattr, "trusted.glusterfs.dht",
- disk_layout, 4 * 4);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set xattr dictionary");
- goto err;
- }
- disk_layout = NULL;
+ gf_log (this->name, GF_LOG_TRACE,
+ "setting hash range %u - %u (type %d) on subvolume %s for %s",
+ layout->list[i].start, layout->list[i].stop,
+ layout->type, subvol->name, loc->path);
- gf_log (this->name, GF_LOG_TRACE,
- "setting hash range %u - %u (type %d) on subvolume %s for %s",
- layout->list[i].start, layout->list[i].stop,
- layout->type, subvol->name, loc->path);
+ dict_ref (xattr);
- dict_ref (xattr);
+ if (!uuid_is_null (local->gfid))
+ uuid_copy (loc->gfid, local->gfid);
- STACK_WIND (frame, dht_selfheal_dir_xattr_cbk,
- subvol, subvol->fops->setxattr,
- loc, xattr, 0);
+ STACK_WIND (frame, dht_selfheal_dir_xattr_cbk,
+ subvol, subvol->fops->setxattr,
+ loc, xattr, 0, NULL);
- dict_unref (xattr);
+ dict_unref (xattr);
- return 0;
+ return 0;
err:
- if (xattr)
- dict_destroy (xattr);
+ if (xattr)
+ dict_destroy (xattr);
- if (disk_layout)
- FREE (disk_layout);
+ GF_FREE (disk_layout);
- dht_selfheal_dir_xattr_cbk (frame, subvol, frame->this,
- -1, ENOMEM);
- return 0;
+ dht_selfheal_dir_xattr_cbk (frame, subvol, frame->this,
+ -1, ENOMEM, NULL);
+ return 0;
}
+int
+dht_fix_dir_xattr (call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
+{
+ dht_local_t *local = NULL;
+ int i = 0;
+ int count = 0;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *dummy = NULL;
+
+ local = frame->local;
+ this = frame->this;
+ conf = this->private;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "writing the new range for all subvolumes");
+
+ local->call_cnt = count = conf->subvolume_cnt;
+
+ for (i = 0; i < layout->cnt; i++) {
+ dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i, NULL);
+
+ if (--count == 0)
+ goto out;
+ }
+ /* if we are here, subvolcount > layout_count. subvols-per-directory
+ * option might be set here. We need to clear out layout from the
+ * non-participating subvolumes, else it will result in overlaps */
+ dummy = dht_layout_new (this, 1);
+ if (!dummy)
+ goto out;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (_gf_false ==
+ dht_is_subvol_in_layout (layout, conf->subvolumes[i])) {
+ dht_selfheal_dir_xattr_persubvol (frame, loc, dummy, 0,
+ conf->subvolumes[i]);
+ if (--count == 0)
+ break;
+ }
+ }
+
+ dht_layout_unref (this, dummy);
+out:
+ return 0;
+}
int
dht_selfheal_dir_xattr (call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
{
- dht_local_t *local = NULL;
- int missing_xattr = 0;
- int i = 0;
- int ret = 0;
- xlator_t *this = NULL;
-
- local = frame->local;
- this = frame->this;
-
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err != -1 || !layout->list[i].stop) {
- /* err != -1 would mean xattr present on the directory
- * or the directory is itself non existant.
- * !layout->list[i].stop would mean layout absent
- */
- continue;
- }
- missing_xattr++;
- }
+ dht_local_t *local = NULL;
+ int missing_xattr = 0;
+ int i = 0;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *dummy = NULL;
+
+ local = frame->local;
+ this = frame->this;
+ conf = this->private;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err != -1 || !layout->list[i].stop) {
+ /* err != -1 would mean xattr present on the directory
+ * or the directory is non existent.
+ * !layout->list[i].stop would mean layout absent
+ */
+
+ continue;
+ }
+ missing_xattr++;
+ }
- gf_log (this->name, GF_LOG_TRACE,
- "%d subvolumes missing xattr for %s",
- missing_xattr, loc->path);
+ gf_log (this->name, GF_LOG_TRACE,
+ "%d subvolumes missing xattr for %s",
+ missing_xattr, loc->path);
- if (missing_xattr == 0) {
- dht_selfheal_dir_finish (frame, this, 0);
- return 0;
- }
+ if (missing_xattr == 0) {
+ dht_selfheal_dir_finish (frame, this, 0);
+ return 0;
+ }
- local->call_cnt = missing_xattr;
+ local->call_cnt = missing_xattr;
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err != -1 || !layout->list[i].stop)
- continue;
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err != -1 || !layout->list[i].stop)
+ continue;
- ret = dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i);
+ dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i, NULL);
- if (--missing_xattr == 0)
- break;
- }
- return 0;
+ if (--missing_xattr == 0)
+ break;
+ }
+ dummy = dht_layout_new (this, 1);
+ if (!dummy)
+ goto out;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (_gf_false ==
+ dht_is_subvol_in_layout (layout, conf->subvolumes[i])) {
+ dht_selfheal_dir_xattr_persubvol (frame, loc, dummy, 0,
+ conf->subvolumes[i]);
+ }
+ }
+ dht_layout_unref (this, dummy);
+out:
+ return 0;
+}
+
+int
+dht_selfheal_dir_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int this_call_cnt = 0;
+
+ local = frame->local;
+ layout = local->selfheal.layout;
+
+ this_call_cnt = dht_frame_return (frame);
+
+ if (is_last_call (this_call_cnt)) {
+ dht_selfheal_dir_xattr (frame, &local->loc, layout);
+ }
+
+ return 0;
}
int
+dht_selfheal_dir_setattr (call_frame_t *frame, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dht_layout_t *layout)
+{
+ int missing_attr = 0;
+ int i = 0;
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+
+ local = frame->local;
+ this = frame->this;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err == -1)
+ missing_attr++;
+ }
+
+ if (missing_attr == 0) {
+ dht_selfheal_dir_xattr (frame, loc, layout);
+ return 0;
+ }
+
+ if (!uuid_is_null (local->gfid))
+ uuid_copy (loc->gfid, local->gfid);
+
+ local->call_cnt = missing_attr;
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err == -1) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "setattr for %s on subvol %s",
+ loc->path, layout->list[i].xlator->name);
+
+ STACK_WIND (frame, dht_selfheal_dir_setattr_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->setattr,
+ loc, stbuf, valid, NULL);
+ }
+ }
+
+ return 0;
+}
+
+int
dht_selfheal_dir_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct stat *stbuf)
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- call_frame_t *prev = NULL;
- xlator_t *subvol = NULL;
- int i = 0;
- int this_call_cnt = 0;
-
-
- local = frame->local;
- layout = local->selfheal.layout;
- prev = cookie;
- subvol = prev->this;
-
- if ((op_ret == 0) || (op_errno == EEXIST)) {
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].xlator == subvol) {
- layout->list[i].err = -1;
- break;
- }
- }
- }
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *subvol = NULL;
+ int i = 0;
+ int this_call_cnt = 0;
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- dht_selfheal_dir_xattr (frame, &local->loc, layout);
- }
+ local = frame->local;
+ layout = local->selfheal.layout;
+ prev = cookie;
+ subvol = prev->this;
- return 0;
+ if ((op_ret == 0) || ((op_ret == -1) && (op_errno == EEXIST))) {
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].xlator == subvol) {
+ layout->list[i].err = -1;
+ break;
+ }
+ }
+ }
+
+ if (op_ret) {
+ gf_log (this->name, ((op_errno == EEXIST) ? GF_LOG_DEBUG :
+ GF_LOG_WARNING),
+ "selfhealing directory %s failed: %s",
+ local->loc.path, strerror (op_errno));
+ goto out;
+ }
+
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+ dht_iatt_merge (this, &local->preparent, preparent, prev->this);
+ dht_iatt_merge (this, &local->postparent, postparent, prev->this);
+
+out:
+ this_call_cnt = dht_frame_return (frame);
+
+ if (is_last_call (this_call_cnt)) {
+ dht_selfheal_dir_setattr (frame, &local->loc, &local->stbuf, 0xffffff, layout);
+ }
+
+ return 0;
}
+void
+dht_selfheal_dir_mkdir_setacl (dict_t *xattr, dict_t *dict)
+{
+ data_t *acl_default = NULL;
+ data_t *acl_access = NULL;
+ xlator_t *this = NULL;
+ int ret = -1;
+
+ GF_ASSERT (xattr);
+ GF_ASSERT (dict);
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ acl_default = dict_get (xattr, POSIX_ACL_DEFAULT_XATTR);
+
+ if (!acl_default) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "ACL_DEFAULT xattr not present");
+ goto cont;
+ }
+ ret = dict_set (dict, POSIX_ACL_DEFAULT_XATTR, acl_default);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "Could not set ACL_DEFAULT xattr");
+cont:
+ acl_access = dict_get (xattr, POSIX_ACL_ACCESS_XATTR);
+ if (!acl_access) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "ACL_ACCESS xattr not present");
+ goto out;
+ }
+ ret = dict_set (dict, POSIX_ACL_ACCESS_XATTR, acl_access);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "Could not set ACL_ACCESS xattr");
+
+out:
+ return;
+}
int
dht_selfheal_dir_mkdir (call_frame_t *frame, loc_t *loc,
- dht_layout_t *layout, int force)
+ dht_layout_t *layout, int force)
{
- int missing_dirs = 0;
- int i = 0;
- dht_local_t *local = NULL;
- xlator_t *this = NULL;
+ int missing_dirs = 0;
+ int i = 0;
+ int ret = -1;
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ dict_t *dict = NULL;
+
+ local = frame->local;
+ this = frame->this;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err == ENOENT || force)
+ missing_dirs++;
+ }
+ if (missing_dirs == 0) {
+ dht_selfheal_dir_setattr (frame, loc, &local->stbuf, 0xffffffff, layout);
+ return 0;
+ }
- local = frame->local;
- this = frame->this;
+ local->call_cnt = missing_dirs;
+ if (!uuid_is_null (local->gfid)) {
+ dict = dict_new ();
+ if (!dict)
+ return -1;
+
+ ret = dict_set_static_bin (dict, "gfid-req", local->gfid, 16);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set gfid in dict", loc->path);
+ } else if (local->params) {
+ /* Send the dictionary from higher layers directly */
+ dict = dict_ref (local->params);
+ }
+ /* Set acls */
+ if (local->xattr && dict)
+ dht_selfheal_dir_mkdir_setacl (local->xattr, dict);
+
+ if (!dict)
+ gf_log (this->name, GF_LOG_WARNING,
+ "dict is NULL, need to make sure gfids are same");
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err == ENOENT || force) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "creating directory %s on subvol %s",
+ loc->path, layout->list[i].xlator->name);
+
+ STACK_WIND (frame, dht_selfheal_dir_mkdir_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->mkdir,
+ loc,
+ st_mode_from_ia (local->stbuf.ia_prot,
+ local->stbuf.ia_type),
+ 0, dict);
+ }
+ }
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err == ENOENT || force)
- missing_dirs++;
- }
+ if (dict)
+ dict_unref (dict);
- if (missing_dirs == 0) {
- dht_selfheal_dir_xattr (frame, loc, layout);
- return 0;
- }
+ return 0;
+}
- local->call_cnt = missing_dirs;
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err == ENOENT || force) {
- gf_log (this->name, GF_LOG_TRACE,
- "creating directory %s on subvol %s",
- loc->path, layout->list[i].xlator->name);
-
- STACK_WIND (frame, dht_selfheal_dir_mkdir_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->mkdir,
- loc, local->stbuf.st_mode);
- }
- }
- return 0;
+int
+dht_selfheal_layout_alloc_start (xlator_t *this, loc_t *loc,
+ dht_layout_t *layout)
+{
+ int start = 0;
+ uint32_t hashval = 0;
+ int ret = 0;
+
+ ret = dht_hash_compute (this, layout->type, loc->path, &hashval);
+ if (ret == 0) {
+ start = (hashval % layout->cnt);
+ }
+
+ return start;
}
-void
-dht_selfheal_layout_new_directory (call_frame_t *frame, loc_t *loc,
- dht_layout_t *layout)
+static inline int
+dht_get_layout_count (xlator_t *this, dht_layout_t *layout, int new_layout)
{
- dht_conf_t *conf = NULL;
- xlator_t *this = NULL;
- uint32_t chunk = 0;
- int i = 0;
- uint32_t start = 0;
- int cnt = 0;
- int err = 0;
-
- this = frame->this;
- conf = this->private;
-
- for (i = 0; i < layout->cnt; i++) {
- err = layout->list[i].err;
- if (err == -1 || err == 0) {
- layout->list[i].err = -1;
- cnt++;
- }
- }
+ int i = 0;
+ int j = 0;
+ int err = 0;
+ int count = 0;
+ dht_conf_t *conf = NULL;
+
+ /* Gets in use only for replace-brick, remove-brick */
+ conf = this->private;
+ for (i = 0; i < layout->cnt; i++) {
+ for (j = 0; j < conf->subvolume_cnt; j++) {
+ if (conf->decommissioned_bricks[j] &&
+ conf->decommissioned_bricks[j] == layout->list[i].xlator) {
+ layout->list[i].err = EINVAL;
+ break;
+ }
+ }
+ }
+
+ for (i = 0; i < layout->cnt; i++) {
+ err = layout->list[i].err;
+ if (err == -1 || err == 0 || err == ENOENT) {
+ /* Setting list[i].err = -1 is an indication for
+ dht_selfheal_layout_new_directory() to assign
+ a range. We set it to -1 based on any one of
+ the three criteria:
+
+ - err == -1 already, which means directory
+ existed but layout was not set on it.
+
+ - err == 0, which means directory exists and
+ has an old layout piece which will be
+ overwritten now.
+
+ - err == ENOENT, which means directory does
+ not exist (possibly racing with mkdir or
+ finishing half done mkdir). The missing
+ directory will be attempted to be recreated.
+
+ It is important to note that it is safe
+ to race with mkdir() as self-heal and
+ mkdir are idempotent operations. Both will
+ strive to set the directory and layouts to
+ the same final state.
+ */
+ count++;
+ if (!err)
+ layout->list[i].err = -1;
+ }
+ }
/* no subvolume has enough space, but can't stop directory creation */
- if (!cnt) {
+ if (!count || !new_layout) {
for (i = 0; i < layout->cnt; i++) {
err = layout->list[i].err;
if (err == ENOSPC) {
layout->list[i].err = -1;
- cnt++;
+ count++;
}
}
}
- chunk = ((unsigned long) 0xffffffff) / cnt;
-
- start = 0;
- for (i = 0; i < layout->cnt; i++) {
- err = layout->list[i].err;
- if (err == -1) {
- layout->list[i].start = start;
- layout->list[i].stop = start + chunk - 1;
-
- start = start + chunk;
-
- gf_log (this->name, GF_LOG_TRACE,
- "gave fix: %u - %u on %s for %s",
- layout->list[i].start, layout->list[i].stop,
- layout->list[i].xlator->name, loc->path);
- if (--cnt == 0) {
- layout->list[i].stop = 0xffffffff;
- break;
- }
- }
+ /* if layout->spread_cnt is set, check if it is <= available
+ * subvolumes (down brick and decommissioned bricks are considered
+ * un-availbale). Else return count (available up bricks) */
+ count = ((layout->spread_cnt &&
+ (layout->spread_cnt <= count)) ?
+ layout->spread_cnt : ((count) ? count : 1));
+
+ return count;
+}
+
+
+void dht_selfheal_layout_new_directory (call_frame_t *frame, loc_t *loc,
+ dht_layout_t *new_layout);
+
+void dht_layout_entry_swap (dht_layout_t *layout, int i, int j);
+void dht_layout_range_swap (dht_layout_t *layout, int i, int j);
+
+/*
+ * It's a bit icky using local variables in a macro, but it makes the rest
+ * of the code a lot clearer.
+ */
+#define OV_ENTRY(x,y) table[x*new->cnt+y]
+
+void
+dht_selfheal_layout_maximize_overlap (call_frame_t *frame, loc_t *loc,
+ dht_layout_t *new, dht_layout_t *old)
+{
+ int i = 0;
+ int j = 0;
+ uint32_t curr_overlap = 0;
+ uint32_t max_overlap = 0;
+ int max_overlap_idx = -1;
+ uint32_t overlap = 0;
+ uint32_t *table = NULL;
+
+ dht_layout_sort_volname (old);
+ /* Now both old_layout->list[] and new_layout->list[]
+ are match the same xlators/subvolumes. i.e,
+ old_layout->[i] and new_layout->[i] are referring
+ to the same subvolumes
+ */
+
+ /* Build a table of overlaps between new[i] and old[j]. */
+ table = alloca(sizeof(overlap)*old->cnt*new->cnt);
+ if (!table) {
+ return;
+ }
+ memset(table,0,sizeof(overlap)*old->cnt*new->cnt);
+ for (i = 0; i < new->cnt; ++i) {
+ for (j = 0; j < old->cnt; ++j) {
+ OV_ENTRY(i,j) = dht_overlap_calc(old,j,new,i);
+ }
+ }
+
+ for (i = 0; i < new->cnt; i++) {
+ if (new->list[i].err > 0) {
+ /* Subvol might be marked for decommission
+ with EINVAL, or some other serious error
+ marked with positive errno.
+ */
+ continue;
+ }
+
+ max_overlap = 0;
+ max_overlap_idx = i;
+ for (j = (i + 1); j < new->cnt; ++j) {
+ if (new->list[j].err > 0) {
+ /* Subvol might be marked for decommission
+ with EINVAL, or some other serious error
+ marked with positive errno.
+ */
+ continue;
+ }
+ /* Calculate the overlap now. */
+ curr_overlap = OV_ENTRY(i,i) + OV_ENTRY(j,j);
+ /* Calculate the overlap after the proposed swap. */
+ overlap = OV_ENTRY(i,j) + OV_ENTRY(j,i);
+ /* Are we better than status quo? */
+ if (overlap > curr_overlap) {
+ overlap -= curr_overlap;
+ /* Are we better than the previous choice? */
+ if (overlap > max_overlap) {
+ max_overlap = overlap;
+ max_overlap_idx = j;
+ }
+ }
+ }
+
+ if (max_overlap_idx != i) {
+ dht_layout_range_swap (new, i, max_overlap_idx);
+ /* Need to swap the table values too. */
+ for (j = 0; j < old->cnt; ++j) {
+ overlap = OV_ENTRY(i,j);
+ OV_ENTRY(i,j) = OV_ENTRY(max_overlap_idx,j);
+ OV_ENTRY(max_overlap_idx,j) = overlap;
+ }
+ }
}
}
+dht_layout_t *
+dht_fix_layout_of_directory (call_frame_t *frame, loc_t *loc,
+ dht_layout_t *layout)
+{
+ int i = 0;
+ xlator_t *this = NULL;
+ dht_layout_t *new_layout = NULL;
+ dht_conf_t *priv = NULL;
+ dht_local_t *local = NULL;
+ uint32_t subvol_down = 0;
+ int ret = 0;
+
+ this = frame->this;
+ priv = this->private;
+ local = frame->local;
+
+ if (layout->type == DHT_HASH_TYPE_DM_USER) {
+ gf_log (THIS->name, GF_LOG_DEBUG, "leaving %s alone",
+ loc->path);
+ goto done;
+ }
+
+ new_layout = dht_layout_new (this, priv->subvolume_cnt);
+ if (!new_layout)
+ goto done;
+
+ /* If a subvolume is down, do not re-write the layout. */
+ ret = dht_layout_anomalies (this, loc, layout, NULL, NULL, NULL,
+ &subvol_down, NULL, NULL);
+
+ if (subvol_down || (ret == -1)) {
+ gf_log (this->name, GF_LOG_WARNING, "%u subvolume(s) are down"
+ ". Skipping fix layout.", subvol_down);
+ GF_FREE (new_layout);
+ return NULL;
+ }
+
+ for (i = 0; i < new_layout->cnt; i++) {
+ if (layout->list[i].err != ENOSPC)
+ new_layout->list[i].err = layout->list[i].err;
+ else
+ new_layout->list[i].err = -1;
+
+ new_layout->list[i].xlator = layout->list[i].xlator;
+ }
+
+ /* First give it a layout as though it is a new directory. This
+ ensures rotation to kick in */
+ dht_layout_sort_volname (new_layout);
+ dht_selfheal_layout_new_directory (frame, loc, new_layout);
+
+ /* Now selectively re-assign ranges only when it helps */
+ dht_selfheal_layout_maximize_overlap (frame, loc, new_layout, layout);
+
+done:
+ if (new_layout) {
+ /* Now that the new layout has all the proper layout, change the
+ inode context */
+ dht_layout_set (this, loc->inode, new_layout);
+
+ /* Make sure the extra 'ref' for existing layout is removed */
+ dht_layout_unref (this, local->layout);
+
+ local->layout = new_layout;
+ }
+
+ return local->layout;
+}
+
+
+void
+dht_selfheal_layout_new_directory (call_frame_t *frame, loc_t *loc,
+ dht_layout_t *layout)
+{
+ xlator_t *this = NULL;
+ uint32_t chunk = 0;
+ int i = 0;
+ uint32_t start = 0;
+ int cnt = 0;
+ int err = 0;
+ int start_subvol = 0;
+
+ this = frame->this;
+
+ cnt = dht_get_layout_count (this, layout, 1);
+
+ chunk = ((unsigned long) 0xffffffff) / ((cnt) ? cnt : 1);
+
+ start_subvol = dht_selfheal_layout_alloc_start (this, loc, layout);
+
+ /* clear out the range, as we are re-computing here */
+ DHT_RESET_LAYOUT_RANGE (layout);
+ for (i = start_subvol; i < layout->cnt; i++) {
+ err = layout->list[i].err;
+ if (err == -1 || err == ENOENT) {
+ DHT_SET_LAYOUT_RANGE(layout, i, start, chunk,
+ cnt, loc->path);
+ if (--cnt == 0) {
+ layout->list[i].stop = 0xffffffff;
+ goto done;
+ }
+ start += chunk;
+ }
+ }
+
+ for (i = 0; i < start_subvol; i++) {
+ err = layout->list[i].err;
+ if (err == -1 || err == ENOENT) {
+ DHT_SET_LAYOUT_RANGE(layout, i, start, chunk,
+ cnt, loc->path);
+ if (--cnt == 0) {
+ layout->list[i].stop = 0xffffffff;
+ goto done;
+ }
+ start += chunk;
+ }
+ }
+
+done:
+ return;
+}
+
int
dht_selfheal_dir_getafix (call_frame_t *frame, loc_t *loc,
- dht_layout_t *layout)
+ dht_layout_t *layout)
{
- dht_conf_t *conf = NULL;
- xlator_t *this = NULL;
- dht_local_t *local = NULL;
- int missing = -1;
- int down = -1;
- int holes = -1;
- int ret = -1;
- int i = -1;
- int overlaps = -1;
-
- this = frame->this;
- conf = this->private;
- local = frame->local;
-
- missing = local->selfheal.missing;
- down = local->selfheal.down;
- holes = local->selfheal.hole_cnt;
- overlaps = local->selfheal.overlaps_cnt;
-
- if ((missing + down) == conf->subvolume_cnt) {
- dht_selfheal_layout_new_directory (frame, loc, layout);
- ret = 0;
- }
+ dht_local_t *local = NULL;
+ uint32_t holes = 0;
+ int ret = -1;
+ int i = -1;
+ uint32_t overlaps = 0;
- if (holes <= down) {
- /* the down subvol might fill up the holes */
- ret = 0;
- }
+ local = frame->local;
- if (holes || missing || overlaps) {
- dht_selfheal_layout_new_directory (frame, loc, layout);
- ret = 0;
- }
+ holes = local->selfheal.hole_cnt;
+ overlaps = local->selfheal.overlaps_cnt;
- for (i = 0; i < layout->cnt; i++) {
- /* directory not present */
- if (layout->list[i].err == ENOENT) {
- ret = 0;
- break;
- }
- }
+ if (holes || overlaps) {
+ dht_selfheal_layout_new_directory (frame, loc, layout);
+ ret = 0;
+ }
- /* TODO: give a fix to these non-virgins */
+ for (i = 0; i < layout->cnt; i++) {
+ /* directory not present */
+ if (layout->list[i].err == ENOENT) {
+ ret = 0;
+ break;
+ }
+ }
- return ret;
+ /* TODO: give a fix to these non-virgins */
+
+ return ret;
}
int
-dht_selfheal_new_directory (call_frame_t *frame,
- dht_selfheal_dir_cbk_t dir_cbk,
- dht_layout_t *layout)
+dht_selfheal_new_directory (call_frame_t *frame,
+ dht_selfheal_dir_cbk_t dir_cbk,
+ dht_layout_t *layout)
{
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- local->selfheal.dir_cbk = dir_cbk;
- local->selfheal.layout = layout;
+ local->selfheal.dir_cbk = dir_cbk;
+ local->selfheal.layout = dht_layout_ref (frame->this, layout);
- dht_layout_sort_volname (layout);
- dht_selfheal_layout_new_directory (frame, &local->loc, layout);
- dht_selfheal_dir_xattr (frame, &local->loc, layout);
- return 0;
+ dht_layout_sort_volname (layout);
+ dht_selfheal_layout_new_directory (frame, &local->loc, layout);
+ dht_selfheal_dir_xattr (frame, &local->loc, layout);
+ return 0;
+}
+
+int
+dht_fix_directory_layout (call_frame_t *frame,
+ dht_selfheal_dir_cbk_t dir_cbk,
+ dht_layout_t *layout)
+{
+ dht_local_t *local = NULL;
+ dht_layout_t *tmp_layout = NULL;
+
+ local = frame->local;
+
+ local->selfheal.dir_cbk = dir_cbk;
+ local->selfheal.layout = dht_layout_ref (frame->this, layout);
+
+ /* No layout sorting required here */
+ tmp_layout = dht_fix_layout_of_directory (frame, &local->loc, layout);
+ if (!tmp_layout) {
+ return -1;
+ }
+ dht_fix_dir_xattr (frame, &local->loc, tmp_layout);
+
+ return 0;
}
int
dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
- loc_t *loc, dht_layout_t *layout)
+ loc_t *loc, dht_layout_t *layout)
{
- dht_local_t *local = NULL;
- uint32_t holes = 0;
- uint32_t overlaps = 0;
- uint32_t missing = 0;
- uint32_t down = 0;
- uint32_t misc = 0;
- int ret = 0;
- xlator_t *this = NULL;
-
- local = frame->local;
- this = frame->this;
-
- ret = dht_layout_anomalies (this, loc, layout,
- &local->selfheal.hole_cnt,
- &local->selfheal.overlaps_cnt,
- &local->selfheal.missing,
- &local->selfheal.down,
- &local->selfheal.misc);
-
- holes = local->selfheal.hole_cnt;
- overlaps = local->selfheal.overlaps_cnt;
- missing = local->selfheal.missing;
- down = local->selfheal.down;
- misc = local->selfheal.misc;
-
- local->selfheal.dir_cbk = dir_cbk;
- local->selfheal.layout = layout;
-
- if (down) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%d subvolumes down -- not fixing", down);
- ret = 0;
- goto sorry_no_fix;
- }
+ dht_local_t *local = NULL;
+ uint32_t down = 0;
+ uint32_t misc = 0;
+ int ret = 0;
+ xlator_t *this = NULL;
+
+ local = frame->local;
+ this = frame->this;
+
+ dht_layout_anomalies (this, loc, layout,
+ &local->selfheal.hole_cnt,
+ &local->selfheal.overlaps_cnt,
+ NULL, &local->selfheal.down,
+ &local->selfheal.misc, NULL);
+
+ down = local->selfheal.down;
+ misc = local->selfheal.misc;
+
+ local->selfheal.dir_cbk = dir_cbk;
+ local->selfheal.layout = dht_layout_ref (this, layout);
+
+ if (down) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%d subvolumes down -- not fixing", down);
+ ret = 0;
+ goto sorry_no_fix;
+ }
- if (misc) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%d subvolumes have unrecoverable errors", misc);
- ret = 0;
- goto sorry_no_fix;
- }
+ if (misc) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%d subvolumes have unrecoverable errors", misc);
+ ret = 0;
+ goto sorry_no_fix;
+ }
- dht_layout_sort_volname (layout);
- ret = dht_selfheal_dir_getafix (frame, loc, layout);
+ dht_layout_sort_volname (layout);
+ ret = dht_selfheal_dir_getafix (frame, loc, layout);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "not able to form layout for the directory");
- goto sorry_no_fix;
- }
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "not able to form layout for the directory");
+ goto sorry_no_fix;
+ }
- dht_selfheal_dir_mkdir (frame, loc, layout, 0);
+ dht_selfheal_dir_mkdir (frame, loc, layout, 0);
- return 0;
+ return 0;
sorry_no_fix:
- /* TODO: need to put appropriate local->op_errno */
- dht_selfheal_dir_finish (frame, this, ret);
+ /* TODO: need to put appropriate local->op_errno */
+ dht_selfheal_dir_finish (frame, this, ret);
- return 0;
+ return 0;
}
int
dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
- loc_t *loc, dht_layout_t *layout)
+ loc_t *loc, dht_layout_t *layout)
{
- int ret = 0;
- dht_local_t *local = NULL;
+ int ret = 0;
+ dht_local_t *local = NULL;
+ local = frame->local;
- local = frame->local;
+ local->selfheal.dir_cbk = dir_cbk;
+ local->selfheal.layout = dht_layout_ref (frame->this, layout);
- local->selfheal.dir_cbk = dir_cbk;
- local->selfheal.layout = layout;
+ ret = dht_selfheal_dir_mkdir (frame, loc, layout, 1);
- ret = dht_selfheal_dir_mkdir (frame, loc, layout, 1);
+ return ret;
+}
- return 0;
+int
+dht_dir_attr_heal (void *data)
+{
+ call_frame_t *frame = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ int call_cnt = 0;
+ int ret = -1;
+ int i = 0;
+
+ GF_VALIDATE_OR_GOTO ("dht", data, out);
+
+ frame = data;
+ local = frame->local;
+ this = frame->this;
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", local, out);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO ("dht", conf, out);
+
+ call_cnt = conf->subvolume_cnt;
+
+ for (i = 0; i < call_cnt; i++) {
+ subvol = conf->subvolumes[i];
+ if (!subvol || (subvol == dht_first_up_subvol (this)))
+ continue;
+ ret = syncop_setattr (subvol, &local->loc, &local->stbuf,
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID),
+ NULL, NULL);
+ if (ret)
+ gf_log ("dht", GF_LOG_ERROR, "Failed to set uid/gid on"
+ " %s on %s subvol (%s)", local->loc.path,
+ subvol->name, strerror (errno));
+ }
+out:
+ return 0;
+}
+
+int
+dht_dir_attr_heal_done (int ret, call_frame_t *sync_frame, void *data)
+{
+ DHT_STACK_DESTROY (sync_frame);
+ return 0;
}
diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
new file mode 100644
index 000000000..70aac7710
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-shared.c
@@ -0,0 +1,758 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+/* TODO: add NS locking */
+
+#include "statedump.h"
+#include "dht-common.h"
+
+/* TODO:
+ - use volumename in xattr instead of "dht"
+ - use NS locks
+ - handle all cases in self heal layout reconstruction
+ - complete linkfile selfheal
+*/
+struct volume_options options[];
+
+void
+dht_layout_dump (dht_layout_t *layout, const char *prefix)
+{
+
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i = 0;
+
+ if (!layout)
+ goto out;
+ if (!prefix)
+ goto out;
+
+ gf_proc_dump_build_key(key, prefix, "cnt");
+ gf_proc_dump_write(key, "%d", layout->cnt);
+ gf_proc_dump_build_key(key, prefix, "preset");
+ gf_proc_dump_write(key, "%d", layout->preset);
+ gf_proc_dump_build_key(key, prefix, "gen");
+ gf_proc_dump_write(key, "%d", layout->gen);
+ if (layout->type != IA_INVAL) {
+ gf_proc_dump_build_key(key, prefix, "inode type");
+ gf_proc_dump_write(key, "%d", layout->type);
+ }
+
+ if (!IA_ISDIR (layout->type))
+ goto out;
+
+ for (i = 0; i < layout->cnt; i++) {
+ gf_proc_dump_build_key(key, prefix,"list[%d].err", i);
+ gf_proc_dump_write(key, "%d", layout->list[i].err);
+ gf_proc_dump_build_key(key, prefix,"list[%d].start", i);
+ gf_proc_dump_write(key, "%u", layout->list[i].start);
+ gf_proc_dump_build_key(key, prefix,"list[%d].stop", i);
+ gf_proc_dump_write(key, "%u", layout->list[i].stop);
+ if (layout->list[i].xlator) {
+ gf_proc_dump_build_key(key, prefix,
+ "list[%d].xlator.type", i);
+ gf_proc_dump_write(key, "%s",
+ layout->list[i].xlator->type);
+ gf_proc_dump_build_key(key, prefix,
+ "list[%d].xlator.name", i);
+ gf_proc_dump_write(key, "%s",
+ layout->list[i].xlator->name);
+ }
+ }
+
+out:
+ return;
+}
+
+
+int32_t
+dht_priv_dump (xlator_t *this)
+{
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i = 0;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+
+ if (!this)
+ goto out;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ ret = TRY_LOCK(&conf->subvolume_lock);
+ if (ret != 0) {
+ return ret;
+ }
+
+ gf_proc_dump_add_section("xlator.cluster.dht.%s.priv", this->name);
+ gf_proc_dump_build_key(key_prefix,"xlator.cluster.dht","%s.priv",
+ this->name);
+ gf_proc_dump_write("subvol_cnt","%d", conf->subvolume_cnt);
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ sprintf (key, "subvolumes[%d]", i);
+ gf_proc_dump_write(key, "%s.%s", conf->subvolumes[i]->type,
+ conf->subvolumes[i]->name);
+ if (conf->file_layouts && conf->file_layouts[i]){
+ sprintf (key, "file_layouts[%d]", i);
+ dht_layout_dump(conf->file_layouts[i], key);
+ }
+ if (conf->dir_layouts && conf->dir_layouts[i]) {
+ sprintf (key, "dir_layouts[%d]", i);
+ dht_layout_dump(conf->dir_layouts[i], key);
+ }
+ if (conf->subvolume_status) {
+
+ sprintf (key, "subvolume_status[%d]", i);
+ gf_proc_dump_write(key, "%d",
+ (int)conf->subvolume_status[i]);
+ }
+
+ }
+
+ gf_proc_dump_write("search_unhashed", "%d", conf->search_unhashed);
+ gf_proc_dump_write("gen", "%d", conf->gen);
+ gf_proc_dump_write("min_free_disk", "%lf", conf->min_free_disk);
+ gf_proc_dump_write("min_free_inodes", "%lf", conf->min_free_inodes);
+ gf_proc_dump_write("disk_unit", "%c", conf->disk_unit);
+ gf_proc_dump_write("refresh_interval", "%d", conf->refresh_interval);
+ gf_proc_dump_write("unhashed_sticky_bit", "%d", conf->unhashed_sticky_bit);
+ if (conf ->du_stats) {
+ gf_proc_dump_write("du_stats.avail_percent", "%lf",
+ conf->du_stats->avail_percent);
+ gf_proc_dump_write("du_stats.avail_space", "%lu",
+ conf->du_stats->avail_space);
+ gf_proc_dump_write("du_stats.avail_inodes", "%lf",
+ conf->du_stats->avail_inodes);
+ gf_proc_dump_write("du_stats.log", "%lu", conf->du_stats->log);
+ }
+
+ if (conf->last_stat_fetch.tv_sec)
+ gf_proc_dump_write("last_stat_fetch", "%s",
+ ctime(&conf->last_stat_fetch.tv_sec));
+
+ UNLOCK(&conf->subvolume_lock);
+
+out:
+ return ret;
+}
+
+int32_t
+dht_inodectx_dump (xlator_t *this, inode_t *inode)
+{
+ int ret = -1;
+ dht_layout_t *layout = NULL;
+
+ if (!this)
+ goto out;
+ if (!inode)
+ goto out;
+
+ ret = dht_inode_ctx_layout_get (inode, this, &layout);
+
+ if ((ret != 0) || !layout)
+ return ret;
+
+ gf_proc_dump_add_section("xlator.cluster.dht.%s.inode", this->name);
+ dht_layout_dump(layout, "layout");
+
+out:
+ return ret;
+}
+
+void
+dht_fini (xlator_t *this)
+{
+ int i = 0;
+ dht_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+
+ conf = this->private;
+ this->private = NULL;
+ if (conf) {
+ if (conf->file_layouts) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ GF_FREE (conf->file_layouts[i]);
+ }
+ GF_FREE (conf->file_layouts);
+ }
+
+ GF_FREE (conf->subvolumes);
+
+ GF_FREE (conf->subvolume_status);
+
+ GF_FREE (conf);
+ }
+out:
+ return;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+
+ ret = xlator_mem_acct_init (this, gf_dht_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ return ret;
+ }
+out:
+ return ret;
+}
+
+
+int
+dht_parse_decommissioned_bricks (xlator_t *this, dht_conf_t *conf,
+ const char *bricks)
+{
+ int i = 0;
+ int ret = -1;
+ char *tmpstr = NULL;
+ char *dup_brick = NULL;
+ char *node = NULL;
+
+ if (!conf || !bricks)
+ goto out;
+
+ dup_brick = gf_strdup (bricks);
+ node = strtok_r (dup_brick, ",", &tmpstr);
+ while (node) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!strcmp (conf->subvolumes[i]->name, node)) {
+ conf->decommissioned_bricks[i] =
+ conf->subvolumes[i];
+ conf->decommission_subvols_cnt++;
+ gf_log (this->name, GF_LOG_INFO,
+ "decommissioning subvolume %s",
+ conf->subvolumes[i]->name);
+ break;
+ }
+ }
+ if (i == conf->subvolume_cnt) {
+ /* Wrong node given. */
+ goto out;
+ }
+ node = strtok_r (NULL, ",", &tmpstr);
+ }
+
+ ret = 0;
+ conf->decommission_in_progress = 1;
+out:
+ GF_FREE (dup_brick);
+
+ return ret;
+}
+
+
+int
+dht_decommissioned_remove (xlator_t *this, dht_conf_t *conf)
+{
+ int i = 0;
+ int ret = -1;
+
+ if (!conf)
+ goto out;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->decommissioned_bricks[i]) {
+ conf->decommissioned_bricks[i] = NULL;
+ conf->decommission_subvols_cnt--;
+ }
+ }
+
+ ret = 0;
+out:
+
+ return ret;
+}
+void
+dht_init_regex (xlator_t *this, dict_t *odict, char *name,
+ regex_t *re, gf_boolean_t *re_valid)
+{
+ char *temp_str;
+
+ if (dict_get_str (odict, name, &temp_str) != 0) {
+ if (strcmp(name,"rsync-hash-regex")) {
+ return;
+ }
+ temp_str = "^\\.(.+)\\.[^.]+$";
+ }
+
+ if (*re_valid) {
+ regfree(re);
+ *re_valid = _gf_false;
+ }
+
+ if (!strcmp(temp_str,"none")) {
+ return;
+ }
+
+ if (regcomp(re,temp_str,REG_EXTENDED) == 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "using regex %s = %s", name, temp_str);
+ *re_valid = _gf_true;
+ }
+ else {
+ gf_log (this->name, GF_LOG_WARNING,
+ "compiling regex %s failed", temp_str);
+ }
+}
+
+int
+dht_reconfigure (xlator_t *this, dict_t *options)
+{
+ dht_conf_t *conf = NULL;
+ char *temp_str = NULL;
+ gf_boolean_t search_unhashed;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", options, out);
+
+ conf = this->private;
+ if (!conf)
+ return 0;
+
+ if (dict_get_str (options, "lookup-unhashed", &temp_str) == 0) {
+ /* If option is not "auto", other options _should_ be boolean*/
+ if (strcasecmp (temp_str, "auto")) {
+ if (!gf_string2boolean (temp_str, &search_unhashed)) {
+ gf_log(this->name, GF_LOG_DEBUG, "Reconfigure:"
+ " lookup-unhashed reconfigured (%s)",
+ temp_str);
+ conf->search_unhashed = search_unhashed;
+ } else {
+ gf_log(this->name, GF_LOG_ERROR, "Reconfigure:"
+ " lookup-unhashed should be boolean,"
+ " not (%s), defaulting to (%d)",
+ temp_str, conf->search_unhashed);
+ //return -1;
+ ret = -1;
+ goto out;
+ }
+ } else {
+ gf_log(this->name, GF_LOG_DEBUG, "Reconfigure:"
+ " lookup-unhashed reconfigured auto ");
+ conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
+ }
+ }
+
+ GF_OPTION_RECONF ("min-free-disk", conf->min_free_disk, options,
+ percent_or_size, out);
+ /* option can be any one of percent or bytes */
+ conf->disk_unit = 0;
+ if (conf->min_free_disk < 100.0)
+ conf->disk_unit = 'p';
+
+ GF_OPTION_RECONF ("min-free-inodes", conf->min_free_inodes, options,
+ percent, out);
+
+ GF_OPTION_RECONF ("directory-layout-spread", conf->dir_spread_cnt,
+ options, uint32, out);
+
+ GF_OPTION_RECONF ("readdir-optimize", conf->readdir_optimize, options,
+ bool, out);
+ if (conf->defrag) {
+ GF_OPTION_RECONF ("rebalance-stats", conf->defrag->stats,
+ options, bool, out);
+ }
+
+ if (dict_get_str (options, "decommissioned-bricks", &temp_str) == 0) {
+ ret = dht_parse_decommissioned_bricks (this, conf, temp_str);
+ if (ret == -1)
+ goto out;
+ } else {
+ ret = dht_decommissioned_remove (this, conf);
+ if (ret == -1)
+ goto out;
+ }
+
+ dht_init_regex (this, options, "rsync-hash-regex",
+ &conf->rsync_regex, &conf->rsync_regex_valid);
+ dht_init_regex (this, options, "extra-hash-regex",
+ &conf->extra_regex, &conf->extra_regex_valid);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+gf_defrag_pattern_list_fill (xlator_t *this, gf_defrag_info_t *defrag, char *data)
+{
+ int ret = -1;
+ char *tmp_str = NULL;
+ char *tmp_str1 = NULL;
+ char *dup_str = NULL;
+ char *num = NULL;
+ char *pattern_str = NULL;
+ char *pattern = NULL;
+ gf_defrag_pattern_list_t *temp_list = NULL;
+ gf_defrag_pattern_list_t *pattern_list = NULL;
+
+ if (!this || !defrag || !data)
+ goto out;
+
+ /* Get the pattern for pattern list. "pattern:<optional-size>"
+ * eg: *avi, *pdf:10MB, *:1TB
+ */
+ pattern_str = strtok_r (data, ",", &tmp_str);
+ while (pattern_str) {
+ dup_str = gf_strdup (pattern_str);
+ pattern_list = GF_CALLOC (1, sizeof (gf_defrag_pattern_list_t),
+ 1);
+ if (!pattern_list) {
+ goto out;
+ }
+ pattern = strtok_r (dup_str, ":", &tmp_str1);
+ num = strtok_r (NULL, ":", &tmp_str1);
+ if (!pattern)
+ goto out;
+ if (!num) {
+ if (gf_string2bytesize(pattern, &pattern_list->size)
+ == 0) {
+ pattern = "*";
+ }
+ } else if (gf_string2bytesize (num, &pattern_list->size) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid number format \"%s\"", num);
+ goto out;
+ }
+ memcpy (pattern_list->path_pattern, pattern, strlen (dup_str));
+
+ if (!defrag->defrag_pattern)
+ temp_list = NULL;
+ else
+ temp_list = defrag->defrag_pattern;
+
+ pattern_list->next = temp_list;
+
+ defrag->defrag_pattern = pattern_list;
+ pattern_list = NULL;
+
+ GF_FREE (dup_str);
+ dup_str = NULL;
+
+ pattern_str = strtok_r (NULL, ",", &tmp_str);
+ }
+
+ ret = 0;
+out:
+ if (ret)
+ GF_FREE (pattern_list);
+ GF_FREE (dup_str);
+
+ return ret;
+}
+
+int
+dht_init (xlator_t *this)
+{
+ dht_conf_t *conf = NULL;
+ char *temp_str = NULL;
+ int ret = -1;
+ int i = 0;
+ gf_defrag_info_t *defrag = NULL;
+ int cmd = 0;
+ char *node_uuid = NULL;
+
+
+ GF_VALIDATE_OR_GOTO ("dht", this, err);
+
+ if (!this->children) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Distribute needs more than one subvolume");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile");
+ }
+
+ conf = GF_CALLOC (1, sizeof (*conf), gf_dht_mt_dht_conf_t);
+ if (!conf) {
+ goto err;
+ }
+
+ ret = dict_get_int32 (this->options, "rebalance-cmd", &cmd);
+
+ if (cmd) {
+ defrag = GF_CALLOC (1, sizeof (gf_defrag_info_t),
+ gf_defrag_info_mt);
+
+ GF_VALIDATE_OR_GOTO (this->name, defrag, err);
+
+ LOCK_INIT (&defrag->lock);
+
+ defrag->is_exiting = 0;
+
+ conf->defrag = defrag;
+
+ ret = dict_get_str (this->options, "node-uuid", &node_uuid);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "node-uuid not "
+ "specified");
+ goto err;
+ }
+
+ if (uuid_parse (node_uuid, defrag->node_uuid)) {
+ gf_log (this->name, GF_LOG_ERROR, "Cannot parse "
+ "glusterd node uuid");
+ goto err;
+ }
+
+ defrag->cmd = cmd;
+
+ defrag->stats = _gf_false;
+ }
+
+ conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_ON;
+ if (dict_get_str (this->options, "lookup-unhashed", &temp_str) == 0) {
+ /* If option is not "auto", other options _should_ be boolean */
+ if (strcasecmp (temp_str, "auto"))
+ gf_string2boolean (temp_str, &conf->search_unhashed);
+ else
+ conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
+ }
+
+ GF_OPTION_INIT ("unhashed-sticky-bit", conf->unhashed_sticky_bit, bool,
+ err);
+
+ GF_OPTION_INIT ("use-readdirp", conf->use_readdirp, bool, err);
+
+ GF_OPTION_INIT ("min-free-disk", conf->min_free_disk, percent_or_size,
+ err);
+
+ GF_OPTION_INIT ("min-free-inodes", conf->min_free_inodes, percent,
+ err);
+
+ conf->dir_spread_cnt = conf->subvolume_cnt;
+ GF_OPTION_INIT ("directory-layout-spread", conf->dir_spread_cnt,
+ uint32, err);
+
+ GF_OPTION_INIT ("assert-no-child-down", conf->assert_no_child_down,
+ bool, err);
+
+ GF_OPTION_INIT ("readdir-optimize", conf->readdir_optimize, bool, err);
+
+ if (defrag) {
+ GF_OPTION_INIT ("rebalance-stats", defrag->stats, bool, err);
+ if (dict_get_str (this->options, "rebalance-filter", &temp_str)
+ == 0) {
+ if (gf_defrag_pattern_list_fill (this, defrag, temp_str)
+ == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "Cannot parse"
+ " rebalance-filter (%s)", temp_str);
+ goto err;
+ }
+ }
+ }
+
+ /* option can be any one of percent or bytes */
+ conf->disk_unit = 0;
+ if (conf->min_free_disk < 100)
+ conf->disk_unit = 'p';
+
+ ret = dht_init_subvolumes (this, conf);
+ if (ret == -1) {
+ goto err;
+ }
+
+ if (dict_get_str (this->options, "decommissioned-bricks", &temp_str) == 0) {
+ ret = dht_parse_decommissioned_bricks (this, conf, temp_str);
+ if (ret == -1)
+ goto err;
+ }
+
+ dht_init_regex (this, this->options, "rsync-hash-regex",
+ &conf->rsync_regex, &conf->rsync_regex_valid);
+ dht_init_regex (this, this->options, "extra-hash-regex",
+ &conf->extra_regex, &conf->extra_regex_valid);
+
+ ret = dht_layouts_init (this, conf);
+ if (ret == -1) {
+ goto err;
+ }
+
+ LOCK_INIT (&conf->subvolume_lock);
+ LOCK_INIT (&conf->layout_lock);
+
+ conf->gen = 1;
+
+ this->local_pool = mem_pool_new (dht_local_t, 512);
+ if (!this->local_pool) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto err;
+ }
+
+ GF_OPTION_INIT ("xattr-name", conf->xattr_name, str, err);
+ gf_asprintf (&conf->link_xattr_name, "%s.linkto", conf->xattr_name);
+ gf_asprintf (&conf->wild_xattr_name, "%s*", conf->xattr_name);
+ if (!conf->link_xattr_name || !conf->wild_xattr_name) {
+ goto err;
+ }
+
+ this->private = conf;
+
+ return 0;
+
+err:
+ if (conf) {
+ if (conf->file_layouts) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ GF_FREE (conf->file_layouts[i]);
+ }
+ GF_FREE (conf->file_layouts);
+ }
+
+ GF_FREE (conf->subvolumes);
+
+ GF_FREE (conf->subvolume_status);
+
+ GF_FREE (conf->du_stats);
+
+ GF_FREE (conf->defrag);
+
+ GF_FREE (conf->xattr_name);
+ GF_FREE (conf->link_xattr_name);
+ GF_FREE (conf->wild_xattr_name);
+
+ GF_FREE (conf);
+ }
+
+ return -1;
+}
+
+
+struct volume_options options[] = {
+ { .key = {"lookup-unhashed"},
+ .value = {"auto", "yes", "no", "enable", "disable", "1", "0",
+ "on", "off"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "on",
+ .description = "This option if set to ON, does a lookup through "
+ "all the sub-volumes, in case a lookup didn't return any result "
+ "from the hash subvolume. If set to OFF, it does not do a lookup "
+ "on the remaining subvolumes."
+ },
+ { .key = {"min-free-disk"},
+ .type = GF_OPTION_TYPE_PERCENT_OR_SIZET,
+ .default_value = "10%",
+ .description = "Percentage/Size of disk space, after which the "
+ "process starts balancing out the cluster, and logs will appear "
+ "in log files",
+ },
+ { .key = {"min-free-inodes"},
+ .type = GF_OPTION_TYPE_PERCENT,
+ .default_value = "5%",
+ .description = "after system has only N% of inodes, warnings "
+ "starts to appear in log files",
+ },
+ { .key = {"unhashed-sticky-bit"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ },
+ { .key = {"use-readdirp"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "This option if set to ON, forces the use of "
+ "readdirp, and hence also displays the stats of the files."
+ },
+ { .key = {"assert-no-child-down"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option if set to ON, in the event of "
+ "CHILD_DOWN, will call exit."
+ },
+ { .key = {"directory-layout-spread"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .validate = GF_OPT_VALIDATE_MIN,
+ .description = "Specifies the directory layout spread."
+ },
+ { .key = {"decommissioned-bricks"},
+ .type = GF_OPTION_TYPE_ANY,
+ .description = "This option if set to ON, decommissions "
+ "the brick, so that no new data is allowed to be created "
+ "on that brick."
+ },
+ { .key = {"rebalance-cmd"},
+ .type = GF_OPTION_TYPE_INT,
+ },
+ { .key = {"node-uuid"},
+ .type = GF_OPTION_TYPE_STR,
+ },
+ { .key = {"rebalance-stats"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option if set to ON displays and logs the "
+ " time taken for migration of each file, during the rebalance "
+ "process. If set to OFF, the rebalance logs will only display the "
+ "time spent in each directory."
+ },
+ { .key = {"readdir-optimize"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option if set to ON enables the optimization "
+ "that allows DHT to requests non-first subvolumes to filter out "
+ "directory entries."
+ },
+ { .key = {"rsync-hash-regex"},
+ .type = GF_OPTION_TYPE_STR,
+ /* Setting a default here doesn't work. See dht_init_regex. */
+ .description = "Regular expression for stripping temporary-file "
+ "suffix and prefix used by rsync, to prevent relocation when the "
+ "file is renamed."
+ },
+ { .key = {"extra-hash-regex"},
+ .type = GF_OPTION_TYPE_STR,
+ /* Setting a default here doesn't work. See dht_init_regex. */
+ .description = "Regular expression for stripping temporary-file "
+ "suffix and prefix used by an application, to prevent relocation when "
+ "the file is renamed."
+ },
+ { .key = {"rebalance-filter"},
+ .type = GF_OPTION_TYPE_STR,
+ },
+
+ { .key = {"xattr-name"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "trusted.glusterfs.dht",
+ .description = "Base for extended attributes used by this "
+ "translator instance, to avoid conflicts with others above or "
+ "below it."
+ },
+
+ /* NUFA option */
+ { .key = {"local-volume-name"},
+ .type = GF_OPTION_TYPE_XLATOR
+ },
+
+ /* switch option */
+ { .key = {"pattern.switch.case"},
+ .type = GF_OPTION_TYPE_ANY
+ },
+
+ { .key = {NULL} },
+};
diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c
index e004dee0d..fc0ca2f77 100644
--- a/xlators/cluster/dht/src/dht.c
+++ b/xlators/cluster/dht/src/dht.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -23,238 +14,76 @@
#include "config.h"
#endif
-/* TODO: add NS locking */
-
-#include "dht-common.c"
-
-/* TODO:
- - use volumename in xattr instead of "dht"
- - use NS locks
- - handle all cases in self heal layout reconstruction
- - complete linkfile selfheal
-*/
-
-
-
-int
-notify (xlator_t *this, int event, void *data, ...)
-{
- int ret = -1;
-
- ret = dht_notify (this, event, data);
-
- return ret;
-}
-
-void
-fini (xlator_t *this)
-{
- int i = 0;
- dht_conf_t *conf = NULL;
-
- conf = this->private;
-
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- FREE (conf->file_layouts[i]);
- }
- FREE (conf->file_layouts);
- }
-
- if (conf->default_dir_layout)
- FREE (conf->default_dir_layout);
-
- if (conf->subvolumes)
- FREE (conf->subvolumes);
-
- if (conf->subvolume_status)
- FREE (conf->subvolume_status);
-
- FREE (conf);
- }
-
- return;
-}
-
-int
-init (xlator_t *this)
-{
- dht_conf_t *conf = NULL;
- char *temp_str = NULL;
- int ret = -1;
- int i = 0;
- uint32_t temp_free_disk = 0;
-
- if (!this->children) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "Distribute needs more than one subvolume");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile");
- }
-
- conf = CALLOC (1, sizeof (*conf));
- if (!conf) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- conf->search_unhashed = 0;
-
- if (dict_get_str (this->options, "lookup-unhashed", &temp_str) == 0) {
- gf_string2boolean (temp_str, &conf->search_unhashed);
- }
-
- conf->unhashed_sticky_bit = 0;
-
- if (dict_get_str (this->options, "unhashed-sticky-bit",
- &temp_str) == 0) {
- gf_string2boolean (temp_str, &conf->unhashed_sticky_bit);
- }
-
- conf->disk_unit = 'p';
- conf->min_free_disk = 10;
-
- if (dict_get_str (this->options, "min-free-disk", &temp_str) == 0) {
- if (gf_string2percent (temp_str, &temp_free_disk) == 0) {
- if (temp_free_disk > 100) {
- gf_string2bytesize (temp_str,
- &conf->min_free_disk);
- conf->disk_unit = 'b';
- } else {
- conf->min_free_disk = (uint64_t)temp_free_disk;
- }
- } else {
- gf_string2bytesize (temp_str, &conf->min_free_disk);
- conf->disk_unit = 'b';
- }
- }
-
-
- ret = dht_init_subvolumes (this, conf);
- if (ret == -1) {
- goto err;
- }
-
- ret = dht_layouts_init (this, conf);
- if (ret == -1) {
- goto err;
- }
-
- conf->du_stats = CALLOC (conf->subvolume_cnt, sizeof (dht_du_t));
- if (!conf->du_stats) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- LOCK_INIT (&conf->subvolume_lock);
-
- conf->gen = 1;
-
- this->private = conf;
-
- return 0;
-
-err:
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- FREE (conf->file_layouts[i]);
- }
- FREE (conf->file_layouts);
- }
-
- if (conf->default_dir_layout)
- FREE (conf->default_dir_layout);
-
- if (conf->subvolumes)
- FREE (conf->subvolumes);
-
- if (conf->subvolume_status)
- FREE (conf->subvolume_status);
-
- if (conf->du_stats)
- FREE (conf->du_stats);
-
- FREE (conf);
- }
-
- return -1;
-}
+#include "statedump.h"
+#include "dht-common.h"
+class_methods_t class_methods = {
+ .init = dht_init,
+ .fini = dht_fini,
+ .reconfigure = dht_reconfigure,
+ .notify = dht_notify
+};
struct xlator_fops fops = {
- .lookup = dht_lookup,
- .mknod = dht_mknod,
- .create = dht_create,
-
- .stat = dht_stat,
- .chmod = dht_chmod,
- .chown = dht_chown,
- .fchown = dht_fchown,
- .fchmod = dht_fchmod,
- .fstat = dht_fstat,
- .utimens = dht_utimens,
- .truncate = dht_truncate,
- .ftruncate = dht_ftruncate,
- .access = dht_access,
- .readlink = dht_readlink,
- .setxattr = dht_setxattr,
- .getxattr = dht_getxattr,
- .removexattr = dht_removexattr,
- .open = dht_open,
- .readv = dht_readv,
- .writev = dht_writev,
- .flush = dht_flush,
- .fsync = dht_fsync,
- .statfs = dht_statfs,
- .lk = dht_lk,
- .opendir = dht_opendir,
- .readdir = dht_readdir,
- .fsyncdir = dht_fsyncdir,
- .symlink = dht_symlink,
- .unlink = dht_unlink,
- .link = dht_link,
- .mkdir = dht_mkdir,
- .rmdir = dht_rmdir,
- .rename = dht_rename,
- .inodelk = dht_inodelk,
- .finodelk = dht_finodelk,
- .entrylk = dht_entrylk,
- .fentrylk = dht_fentrylk,
- .xattrop = dht_xattrop,
- .fxattrop = dht_fxattrop,
-#if 0
- .setdents = dht_setdents,
- .getdents = dht_getdents,
- .checksum = dht_checksum,
-#endif
+ .lookup = dht_lookup,
+ .mknod = dht_mknod,
+ .create = dht_create,
+
+ .open = dht_open,
+ .statfs = dht_statfs,
+ .opendir = dht_opendir,
+ .readdir = dht_readdir,
+ .readdirp = dht_readdirp,
+ .fsyncdir = dht_fsyncdir,
+ .symlink = dht_symlink,
+ .unlink = dht_unlink,
+ .link = dht_link,
+ .mkdir = dht_mkdir,
+ .rmdir = dht_rmdir,
+ .rename = dht_rename,
+ .entrylk = dht_entrylk,
+ .fentrylk = dht_fentrylk,
+
+ /* Inode read operations */
+ .stat = dht_stat,
+ .fstat = dht_fstat,
+ .access = dht_access,
+ .readlink = dht_readlink,
+ .getxattr = dht_getxattr,
+ .fgetxattr = dht_fgetxattr,
+ .readv = dht_readv,
+ .flush = dht_flush,
+ .fsync = dht_fsync,
+ .inodelk = dht_inodelk,
+ .finodelk = dht_finodelk,
+ .lk = dht_lk,
+
+ /* Inode write operations */
+ .fremovexattr = dht_fremovexattr,
+ .removexattr = dht_removexattr,
+ .setxattr = dht_setxattr,
+ .fsetxattr = dht_fsetxattr,
+ .truncate = dht_truncate,
+ .ftruncate = dht_ftruncate,
+ .writev = dht_writev,
+ .xattrop = dht_xattrop,
+ .fxattrop = dht_fxattrop,
+ .setattr = dht_setattr,
+ .fsetattr = dht_fsetattr,
+ .fallocate = dht_fallocate,
+ .discard = dht_discard,
+ .zerofill = dht_zerofill,
};
-
-struct xlator_mops mops = {
+struct xlator_dumpops dumpops = {
+ .priv = dht_priv_dump,
+ .inodectx = dht_inodectx_dump,
};
struct xlator_cbks cbks = {
-// .release = dht_release,
+// .release = dht_release,
// .releasedir = dht_releasedir,
- .forget = dht_forget
-};
-
-
-struct volume_options options[] = {
- { .key = {"lookup-unhashed"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"min-free-disk"},
- .type = GF_OPTION_TYPE_PERCENT_OR_SIZET,
- },
- { .key = {NULL} },
+ .forget = dht_forget
};
+;
diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c
index c8ce7fdee..e934acdf0 100644
--- a/xlators/cluster/dht/src/nufa.c
+++ b/xlators/cluster/dht/src/nufa.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -23,16 +14,18 @@
#include "config.h"
#endif
-#include "dht-common.c"
+#include "dht-common.h"
/* TODO: all 'TODO's in dht.c holds good */
-int
+extern struct volume_options options[];
+
+int
nufa_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct stat *stbuf, dict_t *xattr)
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
{
- dht_layout_t *layout = NULL;
xlator_t *subvol = NULL;
char is_linkfile = 0;
char is_dir = 0;
@@ -41,8 +34,8 @@ nufa_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
loc_t *loc = NULL;
int i = 0;
call_frame_t *prev = NULL;
- int call_cnt = 0;
-
+ int call_cnt = 0;
+ int ret = 0;
conf = this->private;
@@ -50,58 +43,52 @@ nufa_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
loc = &local->loc;
- if (ENTRY_MISSING (op_ret, op_errno)) {
- if (conf->search_unhashed) {
- local->op_errno = ENOENT;
- dht_lookup_everywhere (frame, this, loc);
- return 0;
- }
- }
+ if (ENTRY_MISSING (op_ret, op_errno)) {
+ if (conf->search_unhashed) {
+ local->op_errno = ENOENT;
+ dht_lookup_everywhere (frame, this, loc);
+ return 0;
+ }
+ }
if (op_ret == -1)
goto out;
- is_linkfile = check_is_linkfile (inode, stbuf, xattr);
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr,
+ conf->link_xattr_name);
is_dir = check_is_dir (inode, stbuf, xattr);
if (!is_dir && !is_linkfile) {
/* non-directory and not a linkfile */
+ ret = dht_layout_preset (this, prev->this, inode);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "could not set pre-set layout for subvol %s",
+ prev->this->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto err;
+ }
- dht_itransform (this, prev->this, stbuf->st_ino,
- &stbuf->st_ino);
-
- layout = dht_layout_for_subvol (this, prev->this);
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no pre-set layout for subvolume %s",
- prev->this->name);
- op_ret = -1;
- op_errno = EINVAL;
- goto err;
- }
-
- inode_ctx_put (inode, this, (uint64_t)(long)layout);
goto out;
}
if (is_dir) {
call_cnt = conf->subvolume_cnt;
- local->call_cnt = call_cnt;
+ local->call_cnt = call_cnt;
local->inode = inode_ref (inode);
local->xattr = dict_ref (xattr);
- local->op_ret = 0;
- local->op_errno = 0;
+ local->op_ret = 0;
+ local->op_errno = 0;
- local->layout = dht_layout_new (this, conf->subvolume_cnt);
- if (!local->layout) {
- op_ret = -1;
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_DEBUG,
- "memory allocation failed :(");
- goto err;
- }
+ local->layout = dht_layout_new (this, conf->subvolume_cnt);
+ if (!local->layout) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto err;
+ }
for (i = 0; i < call_cnt; i++) {
STACK_WIND (frame, dht_lookup_dir_cbk,
@@ -118,51 +105,52 @@ nufa_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
gf_log (this->name, GF_LOG_DEBUG,
"linkfile not having link subvolume. path=%s",
loc->path);
- dht_lookup_everywhere (frame, this, loc);
- return 0;
+ dht_lookup_everywhere (frame, this, loc);
+ return 0;
}
- STACK_WIND (frame, dht_lookup_linkfile_cbk,
- subvol, subvol->fops->lookup,
- &local->loc, local->xattr_req);
+ STACK_WIND (frame, dht_lookup_linkfile_cbk,
+ subvol, subvol->fops->lookup,
+ &local->loc, local->xattr_req);
}
return 0;
out:
- if (!local->hashed_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- local->loc.path);
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, dht_lookup_cbk,
- local->hashed_subvol, local->hashed_subvol->fops->lookup,
- &local->loc, local->xattr_req);
-
- return 0;
-
- err:
- DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf, xattr);
+ if (!local->hashed_subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no subvolume in layout for path=%s",
+ local->loc.path);
+ local->op_errno = ENOENT;
+ dht_lookup_everywhere (frame, this, loc);
+ return 0;
+ }
+
+ STACK_WIND (frame, dht_lookup_cbk,
+ local->hashed_subvol, local->hashed_subvol->fops->lookup,
+ &local->loc, local->xattr_req);
+
+ return 0;
+
+err:
+ DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno,
+ inode, stbuf, xattr, postparent);
return 0;
}
int
nufa_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
+ loc_t *loc, dict_t *xattr_req)
{
xlator_t *hashed_subvol = NULL;
- xlator_t *cached_subvol = NULL;
xlator_t *subvol = NULL;
dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
+ dht_conf_t *conf = NULL;
int ret = -1;
int op_errno = -1;
- dht_layout_t *layout = NULL;
- int i = 0;
- int call_cnt = 0;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int call_cnt = 0;
VALIDATE_OR_GOTO (frame, err);
@@ -171,40 +159,26 @@ nufa_lookup (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (loc->inode, err);
VALIDATE_OR_GOTO (loc->path, err);
- conf = this->private;
-
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ conf = this->private;
- ret = loc_dup (loc, &local->loc);
- if (ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "copying location failed for path=%s",
- loc->path);
+ local = dht_local_init (frame, loc, NULL, GF_FOP_LOOKUP);
+ if (!local) {
+ op_errno = ENOMEM;
goto err;
}
- if (xattr_req) {
- local->xattr_req = dict_ref (xattr_req);
- } else {
- local->xattr_req = dict_new ();
- }
+ if (xattr_req) {
+ local->xattr_req = dict_ref (xattr_req);
+ } else {
+ local->xattr_req = dict_new ();
+ }
- hashed_subvol = dht_subvol_get_hashed (this, &local->loc);
- cached_subvol = dht_subvol_get_cached (this, local->loc.inode);
-
- local->cached_subvol = cached_subvol;
- local->hashed_subvol = hashed_subvol;
+ hashed_subvol = dht_subvol_get_hashed (this, &local->loc);
- if (is_revalidate (loc)) {
- layout = dht_layout_get (this, loc->inode);
+ local->hashed_subvol = hashed_subvol;
+ if (is_revalidate (loc)) {
+ layout = local->layout;
if (!layout) {
gf_log (this->name, GF_LOG_DEBUG,
"revalidate without cache. path=%s",
@@ -213,530 +187,490 @@ nufa_lookup (call_frame_t *frame, xlator_t *this,
goto err;
}
- if (layout->gen && (layout->gen < conf->gen)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "incomplete layout failure for path=%s",
- loc->path);
- op_errno = ESTALE;
- goto err;
- }
-
- local->inode = inode_ref (loc->inode);
- local->st_ino = loc->inode->ino;
-
- local->call_cnt = layout->cnt;
- call_cnt = local->call_cnt;
-
- /* NOTE: we don't require 'trusted.glusterfs.dht.linkto' attribute,
- * revalidates directly go to the cached-subvolume.
- */
- ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
-
- for (i = 0; i < layout->cnt; i++) {
- subvol = layout->list[i].xlator;
-
- STACK_WIND (frame, dht_revalidate_cbk,
- subvol, subvol->fops->lookup,
- loc, local->xattr_req);
-
- if (!--call_cnt)
- break;
- }
- } else {
- ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
-
- ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht.linkto", 256);
-
- /* Send it to only local volume */
- STACK_WIND (frame, nufa_local_lookup_cbk,
- conf->local_volume,
- conf->local_volume->fops->lookup,
- loc, local->xattr_req);
- }
+ if (layout->gen && (layout->gen < conf->gen)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "incomplete layout failure for path=%s",
+ loc->path);
+ dht_layout_unref (this, local->layout);
+ goto do_fresh_lookup;
+ }
+
+ local->inode = inode_ref (loc->inode);
+
+ local->call_cnt = layout->cnt;
+ call_cnt = local->call_cnt;
+
+ /* NOTE: we don't require 'trusted.glusterfs.dht.linkto' attribute,
+ * revalidates directly go to the cached-subvolume.
+ */
+ ret = dict_set_uint32 (local->xattr_req,
+ conf->xattr_name, 4 * 4);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set dict value.");
+ op_errno = -1;
+ goto err;
+ }
+
+ for (i = 0; i < layout->cnt; i++) {
+ subvol = layout->list[i].xlator;
+
+ STACK_WIND (frame, dht_revalidate_cbk,
+ subvol, subvol->fops->lookup,
+ loc, local->xattr_req);
+
+ if (!--call_cnt)
+ break;
+ }
+ } else {
+ do_fresh_lookup:
+ ret = dict_set_uint32 (local->xattr_req,
+ conf->xattr_name, 4 * 4);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set dict value.");
+ op_errno = -1;
+ goto err;
+ }
+
+ ret = dict_set_uint32 (local->xattr_req,
+ conf->link_xattr_name, 256);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set dict value.");
+ op_errno = -1;
+ goto err;
+ }
+
+ /* Send it to only local volume */
+ STACK_WIND (frame, nufa_local_lookup_cbk,
+ (xlator_t *)conf->private,
+ ((xlator_t *)conf->private)->fops->lookup,
+ loc, local->xattr_req);
+ }
return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL);
+ return 0;
}
int
-nufa_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- inode_t *inode, struct stat *stbuf)
+nufa_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- dht_conf_t *conf = NULL;
-
- local = frame->local;
- prev = cookie;
- conf = this->private;
-
- if (op_ret == -1)
- goto err;
-
- STACK_WIND (frame, dht_create_cbk,
- local->cached_subvol, local->cached_subvol->fops->create,
- &local->loc, local->flags, local->mode, local->fd);
-
- return 0;
-
- err:
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret == -1)
+ goto err;
+
+ STACK_WIND (frame, dht_create_cbk,
+ local->cached_subvol, local->cached_subvol->fops->create,
+ &local->loc, local->flags, local->mode, local->umask,
+ local->fd, local->params);
+
+ return 0;
+
+err:
+ DHT_STACK_UNWIND (create, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL, NULL);
+ return 0;
}
int
nufa_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode, fd_t *fd)
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *params)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *subvol = NULL;
xlator_t *avail_subvol = NULL;
- int op_errno = -1;
- int ret = -1;
+ int op_errno = -1;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
- conf = this->private;
+ conf = this->private;
dht_get_du_info (frame, this, loc);
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- subvol = dht_subvol_get_hashed (this, loc);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = ENOENT;
- goto err;
- }
-
- avail_subvol = conf->local_volume;
- if (dht_is_subvol_filled (this, conf->local_volume)) {
- avail_subvol =
- dht_free_disk_available_subvol (this,
- conf->local_volume);
+ local = dht_local_init (frame, loc, fd, GF_FOP_CREATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
}
-
+
+ subvol = dht_subvol_get_hashed (this, loc);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ avail_subvol = conf->private;
+ if (dht_is_subvol_filled (this, (xlator_t *)conf->private)) {
+ avail_subvol =
+ dht_free_disk_available_subvol (this,
+ (xlator_t *)conf->private,
+ local);
+ }
+
if (subvol != avail_subvol) {
/* create a link file instead of actual file */
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- local->fd = fd_ref (fd);
+ local->params = dict_ref (params);
local->mode = mode;
local->flags = flags;
-
+ local->umask = umask;
local->cached_subvol = avail_subvol;
- dht_linkfile_create (frame,
- nufa_create_linkfile_create_cbk,
- avail_subvol, subvol, loc);
+ dht_linkfile_create (frame, nufa_create_linkfile_create_cbk,
+ this, avail_subvol, subvol, loc);
return 0;
}
gf_log (this->name, GF_LOG_TRACE,
"creating %s on %s", loc->path, subvol->name);
-
+
STACK_WIND (frame, dht_create_cbk,
subvol, subvol->fops->create,
- loc, flags, mode, fd);
-
+ loc, flags, mode, umask, fd, params);
+
return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (create, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
int
nufa_mknod_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct stat *stbuf)
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- dht_conf_t *conf = NULL;
-
- local = frame->local;
- prev = cookie;
- conf = this->private;
-
- if (op_ret >= 0) {
- STACK_WIND (frame, dht_newfile_cbk,
- local->cached_subvol,
- local->cached_subvol->fops->mknod,
- &local->loc, local->mode, local->rdev);
-
- return 0;
- }
-
- DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf);
- return 0;
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local || !local->cached_subvol) {
+ op_errno = EINVAL;
+ op_ret = -1;
+ goto err;
+ }
+
+ if (op_ret >= 0) {
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk,
+ (void *)local->cached_subvol, local->cached_subvol,
+ local->cached_subvol->fops->mknod,
+ &local->loc, local->mode, local->rdev,
+ local->umask, local->params);
+
+ return 0;
+ }
+err:
+ WIPE (postparent);
+ WIPE (preparent);
+
+ DHT_STACK_UNWIND (link, frame, op_ret, op_errno,
+ inode, stbuf, preparent, postparent, xdata);
+ return 0;
}
int
nufa_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t rdev)
+ loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *params)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *subvol = NULL;
xlator_t *avail_subvol = NULL;
- int op_errno = -1;
- int ret = -1;
+ int op_errno = -1;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
- conf = this->private;
+ conf = this->private;
dht_get_du_info (frame, this, loc);
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- subvol = dht_subvol_get_hashed (this, loc);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = ENOENT;
- goto err;
- }
+ local = dht_local_init (frame, loc, NULL, GF_FOP_MKNOD);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = dht_subvol_get_hashed (this, loc);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
/* Consider the disksize in consideration */
- avail_subvol = conf->local_volume;
- if (dht_is_subvol_filled (this, conf->local_volume)) {
- avail_subvol =
- dht_free_disk_available_subvol (this,
- conf->local_volume);
+ avail_subvol = conf->private;
+ if (dht_is_subvol_filled (this, (xlator_t *)conf->private)) {
+ avail_subvol =
+ dht_free_disk_available_subvol (this,
+ (xlator_t *)conf->private,
+ local);
}
- if (avail_subvol != subvol) {
- /* Create linkfile first */
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- local->mode = mode;
- local->rdev = rdev;
- local->cached_subvol = avail_subvol;
-
- dht_linkfile_create (frame, nufa_mknod_linkfile_cbk,
+ if (avail_subvol != subvol) {
+ /* Create linkfile first */
+
+ local->params = dict_ref (params);
+ local->mode = mode;
+ local->umask = umask;
+ local->rdev = rdev;
+ local->cached_subvol = avail_subvol;
+
+ dht_linkfile_create (frame, nufa_mknod_linkfile_cbk, this,
avail_subvol, subvol, loc);
- return 0;
- }
+ return 0;
+ }
- gf_log (this->name, GF_LOG_TRACE,
- "creating %s on %s", loc->path, subvol->name);
+ gf_log (this->name, GF_LOG_TRACE,
+ "creating %s on %s", loc->path, subvol->name);
- STACK_WIND (frame, dht_newfile_cbk,
- subvol, subvol->fops->mknod,
- loc, mode, rdev);
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)subvol, subvol,
+ subvol->fops->mknod, loc, mode, rdev, umask,
+ params);
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (mknod, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-int
-notify (xlator_t *this, int event, void *data, ...)
-{
- int ret = -1;
-
- ret = dht_notify (this, event, data);
-
- return ret;
-}
-
-void
-fini (xlator_t *this)
+gf_boolean_t
+same_first_part (char *str1, char term1, char *str2, char term2)
{
- int i = 0;
- dht_conf_t *conf = NULL;
-
- conf = this->private;
-
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- FREE (conf->file_layouts[i]);
- }
- FREE (conf->file_layouts);
+ gf_boolean_t ended1;
+ gf_boolean_t ended2;
+
+ for (;;) {
+ ended1 = ((*str1 == '\0') || (*str1 == term1));
+ ended2 = ((*str2 == '\0') || (*str2 == term2));
+ if (ended1 && ended2) {
+ return _gf_true;
}
+ if (ended1 || ended2 || (*str1 != *str2)) {
+ return _gf_false;
+ }
+ ++str1;
+ ++str2;
+ }
+}
- if (conf->default_dir_layout)
- FREE (conf->default_dir_layout);
-
- if (conf->subvolumes)
- FREE (conf->subvolumes);
+typedef struct nufa_args {
+ xlator_t *this;
+ char *volname;
+ gf_boolean_t addr_match;
+} nufa_args_t;
- if (conf->subvolume_status)
- FREE (conf->subvolume_status);
+static void
+nufa_find_local_brick (xlator_t *xl, void *data)
+{
+ nufa_args_t *args = data;
+ xlator_t *this = args->this;
+ char *local_volname = args->volname;
+ gf_boolean_t addr_match = args->addr_match;
+ char *brick_host = NULL;
+ dht_conf_t *conf = this->private;
+ int ret = -1;
+
+ /*This means a local subvol was already found. We pick the first brick
+ * that is local*/
+ if (conf->private)
+ return;
+
+ if (strcmp (xl->name, local_volname) == 0) {
+ conf->private = xl;
+ gf_log (this->name, GF_LOG_INFO, "Using specified subvol %s",
+ local_volname);
+ return;
+ }
- FREE (conf);
+ if (!addr_match)
+ return;
+
+ ret = dict_get_str (xl->options, "remote-host", &brick_host);
+ if ((ret == 0) &&
+ (gf_is_same_address (local_volname, brick_host) ||
+ gf_is_local_addr (brick_host))) {
+ conf->private = xl;
+ gf_log (this->name, GF_LOG_INFO, "Using the first local "
+ "subvol %s", xl->name);
+ return;
}
- return;
}
-int
-init (xlator_t *this)
+static void
+nufa_to_dht (xlator_t *this)
{
- dht_conf_t *conf = NULL;
- xlator_list_t *trav = NULL;
- data_t *data = NULL;
- char *local_volname = NULL;
- char *temp_str = NULL;
- int ret = -1;
- int i = 0;
- char my_hostname[256];
- uint32_t temp_free_disk = 0;
-
- if (!this->children) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "NUFA needs more than one subvolume");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile");
- }
-
- conf = CALLOC (1, sizeof (*conf));
- if (!conf) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- conf->search_unhashed = 0;
+ GF_ASSERT (this);
+ GF_ASSERT (this->fops);
- if (dict_get_str (this->options, "lookup-unhashed",
- &temp_str) == 0) {
- gf_string2boolean (temp_str,
- &conf->search_unhashed);
- }
-
- ret = dht_init_subvolumes (this, conf);
- if (ret == -1) {
- goto err;
- }
+ this->fops->lookup = dht_lookup;
+ this->fops->create = dht_create;
+ this->fops->mknod = dht_mknod;
+}
- ret = dht_layouts_init (this, conf);
- if (ret == -1) {
- goto err;
+int
+nufa_find_local_subvol (xlator_t *this,
+ void (*fn) (xlator_t *each, void* data), void *data)
+{
+ int ret = -1;
+ dht_conf_t *conf = this->private;
+ xlator_list_t *trav = NULL;
+ xlator_t *parent = NULL;
+ xlator_t *candidate = NULL;
+
+ xlator_foreach_depth_first (this, fn, data);
+ if (!conf->private) {
+ gf_log (this->name, GF_LOG_ERROR, "Couldn't find a local "
+ "brick");
+ return -1;
}
- LOCK_INIT (&conf->subvolume_lock);
-
- conf->gen = 1;
-
- local_volname = "localhost";
- ret = gethostname (my_hostname, 256);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "could not find hostname (%s)",
- strerror (errno));
- }
-
- if (ret == 0)
- local_volname = my_hostname;
-
- data = dict_get (this->options, "local-volume-name");
- if (data) {
- local_volname = data->data;
- }
-
- trav = this->children;
- while (trav) {
- if (strcmp (trav->xlator->name, local_volname) == 0)
- break;
- trav = trav->next;
- }
-
- if (!trav) {
- gf_log (this->name, GF_LOG_ERROR,
- "Could not find subvolume named '%s'. "
- "Please define volume with the name as the hostname "
- "or override it with 'option local-volume-name'",
- local_volname);
- goto err;
- }
- /* The volume specified exists */
- conf->local_volume = trav->xlator;
-
- conf->min_free_disk = 10;
- conf->disk_unit = 'p';
-
- if (dict_get_str (this->options, "min-free-disk",
- &temp_str) == 0) {
- if (gf_string2percent (temp_str,
- &temp_free_disk) == 0) {
- if (temp_free_disk > 100) {
- gf_string2bytesize (temp_str,
- &conf->min_free_disk);
- conf->disk_unit = 'b';
- } else {
- conf->min_free_disk = (uint64_t)temp_free_disk;
- conf->disk_unit = 'p';
- }
- } else {
- gf_string2bytesize (temp_str,
- &conf->min_free_disk);
- conf->disk_unit = 'b';
+ candidate = conf->private;
+ trav = candidate->parents;
+ while (trav) {
+
+ parent = trav->xlator;
+ if (strcmp (parent->type, "cluster/nufa") == 0) {
+ gf_log (this->name, GF_LOG_INFO, "Found local subvol, "
+ "%s", candidate->name);
+ ret = 0;
+ conf->private = candidate;
+ break;
}
- }
- conf->du_stats = CALLOC (conf->subvolume_cnt, sizeof (dht_du_t));
- if (!conf->du_stats) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
+ candidate = parent;
+ trav = parent->parents;
}
- this->private = conf;
-
- return 0;
+ return ret;
+}
-err:
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- FREE (conf->file_layouts[i]);
- }
- FREE (conf->file_layouts);
- }
+int
+nufa_init (xlator_t *this)
+{
+ data_t *data = NULL;
+ char *local_volname = NULL;
+ int ret = -1;
+ char my_hostname[256];
+ gf_boolean_t addr_match = _gf_false;
+ nufa_args_t args = {0, };
- if (conf->default_dir_layout)
- FREE (conf->default_dir_layout);
+ ret = dht_init(this);
+ if (ret) {
+ return ret;
+ }
- if (conf->subvolumes)
- FREE (conf->subvolumes);
+ if ((data = dict_get (this->options, "local-volume-name"))) {
+ local_volname = data->data;
- if (conf->subvolume_status)
- FREE (conf->subvolume_status);
+ } else {
+ addr_match = _gf_true;
+ local_volname = "localhost";
+ ret = gethostname (my_hostname, 256);
+ if (ret == 0)
+ local_volname = my_hostname;
- if (conf->du_stats)
- FREE (conf->du_stats);
+ else
+ gf_log (this->name, GF_LOG_WARNING,
+ "could not find hostname (%s)",
+ strerror (errno));
- FREE (conf);
}
- return -1;
+ args.this = this;
+ args.volname = local_volname;
+ args.addr_match = addr_match;
+ ret = nufa_find_local_subvol (this, nufa_find_local_brick, &args);
+ if (ret) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Unable to find local subvolume, switching "
+ "to dht mode");
+ nufa_to_dht (this);
+ }
+ return 0;
}
-struct xlator_fops fops = {
- .lookup = nufa_lookup,
- .create = nufa_create,
- .mknod = nufa_mknod,
-
- .stat = dht_stat,
- .chmod = dht_chmod,
- .chown = dht_chown,
- .fchown = dht_fchown,
- .fchmod = dht_fchmod,
- .fstat = dht_fstat,
- .utimens = dht_utimens,
- .truncate = dht_truncate,
- .ftruncate = dht_ftruncate,
- .access = dht_access,
- .readlink = dht_readlink,
- .setxattr = dht_setxattr,
- .getxattr = dht_getxattr,
- .removexattr = dht_removexattr,
- .open = dht_open,
- .readv = dht_readv,
- .writev = dht_writev,
- .flush = dht_flush,
- .fsync = dht_fsync,
- .statfs = dht_statfs,
- .lk = dht_lk,
- .opendir = dht_opendir,
- .readdir = dht_readdir,
- .fsyncdir = dht_fsyncdir,
- .symlink = dht_symlink,
- .unlink = dht_unlink,
- .link = dht_link,
- .mkdir = dht_mkdir,
- .rmdir = dht_rmdir,
- .rename = dht_rename,
- .inodelk = dht_inodelk,
- .finodelk = dht_finodelk,
- .entrylk = dht_entrylk,
- .fentrylk = dht_fentrylk,
- .xattrop = dht_xattrop,
- .fxattrop = dht_fxattrop,
-#if 0
- .setdents = dht_setdents,
- .getdents = dht_getdents,
- .checksum = dht_checksum,
-#endif
+class_methods_t class_methods = {
+ .init = nufa_init,
+ .fini = dht_fini,
+ .reconfigure = dht_reconfigure,
+ .notify = dht_notify
};
-struct xlator_mops mops = {
+struct xlator_fops fops = {
+ .lookup = nufa_lookup,
+ .create = nufa_create,
+ .mknod = nufa_mknod,
+
+ .stat = dht_stat,
+ .fstat = dht_fstat,
+ .truncate = dht_truncate,
+ .ftruncate = dht_ftruncate,
+ .access = dht_access,
+ .readlink = dht_readlink,
+ .setxattr = dht_setxattr,
+ .getxattr = dht_getxattr,
+ .removexattr = dht_removexattr,
+ .open = dht_open,
+ .readv = dht_readv,
+ .writev = dht_writev,
+ .flush = dht_flush,
+ .fsync = dht_fsync,
+ .statfs = dht_statfs,
+ .lk = dht_lk,
+ .opendir = dht_opendir,
+ .readdir = dht_readdir,
+ .readdirp = dht_readdirp,
+ .fsyncdir = dht_fsyncdir,
+ .symlink = dht_symlink,
+ .unlink = dht_unlink,
+ .link = dht_link,
+ .mkdir = dht_mkdir,
+ .rmdir = dht_rmdir,
+ .rename = dht_rename,
+ .inodelk = dht_inodelk,
+ .finodelk = dht_finodelk,
+ .entrylk = dht_entrylk,
+ .fentrylk = dht_fentrylk,
+ .xattrop = dht_xattrop,
+ .fxattrop = dht_fxattrop,
+ .setattr = dht_setattr,
};
struct xlator_cbks cbks = {
-// .release = dht_release,
-// .releasedir = dht_releasedir,
- .forget = dht_forget
-};
-
-
-struct volume_options options[] = {
- { .key = {"local-volume-name"},
- .type = GF_OPTION_TYPE_XLATOR
- },
- { .key = {"lookup-unhashed"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"min-free-disk"},
- .type = GF_OPTION_TYPE_PERCENT_OR_SIZET,
- },
- { .key = {NULL} },
+ .forget = dht_forget
};
diff --git a/xlators/cluster/dht/src/switch.c b/xlators/cluster/dht/src/switch.c
new file mode 100644
index 000000000..d3ea90ba8
--- /dev/null
+++ b/xlators/cluster/dht/src/switch.c
@@ -0,0 +1,904 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "dht-common.h"
+#include "dht-mem-types.h"
+
+#include <sys/time.h>
+#include <stdlib.h>
+#include <fnmatch.h>
+#include <string.h>
+
+extern struct volume_options options[];
+
+struct switch_sched_array {
+ xlator_t *xl;
+ int32_t eligible;
+ int32_t considered;
+};
+
+/* Select one of this struct based on the path's pattern match */
+struct switch_struct {
+ struct switch_struct *next;
+ struct switch_sched_array *array;
+ int32_t node_index; /* Index of the node in
+ this pattern. */
+ int32_t num_child; /* Total num of child nodes
+ with this pattern. */
+ char path_pattern[256];
+};
+
+/* TODO: all 'TODO's in dht.c holds good */
+/* This function should return child node as '*:subvolumes' is inserterd */
+
+static int32_t
+gf_switch_valid_child (xlator_t *this, const char *child)
+{
+ xlator_list_t *children = NULL;
+ int32_t ret = 0;
+
+ children = this->children;
+ while (children) {
+ if (!strcmp (child, children->xlator->name)) {
+ ret = 1;
+ break;
+ }
+ children = children->next;
+ }
+
+ return ret;
+}
+
+static xlator_t *
+get_switch_matching_subvol (const char *path, dht_conf_t *conf,
+ xlator_t *hashed_subvol)
+{
+ struct switch_struct *cond = NULL;
+ struct switch_struct *trav = NULL;
+ char *pathname = NULL;
+ int idx = 0;
+ xlator_t *subvol = NULL;
+
+ cond = conf->private;
+ subvol = hashed_subvol;
+ if (!cond)
+ goto out;
+
+ pathname = gf_strdup (path);
+ if (!pathname)
+ goto out;
+
+ trav = cond;
+ while (trav) {
+ if (fnmatch (trav->path_pattern,
+ pathname, FNM_NOESCAPE) == 0) {
+ for (idx = 0; idx < trav->num_child; idx++) {
+ if (trav->array[idx].xl == hashed_subvol)
+ goto out;
+ }
+ idx = trav->node_index++;
+ trav->node_index %= trav->num_child;
+ subvol = trav->array[idx].xl;
+ goto out;
+ }
+ trav = trav->next;
+ }
+out:
+ GF_FREE (pathname);
+
+ return subvol;
+}
+
+
+int
+switch_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ xlator_t *subvol = NULL;
+ char is_linkfile = 0;
+ char is_dir = 0;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ loc_t *loc = NULL;
+ int i = 0;
+ call_frame_t *prev = NULL;
+ int call_cnt = 0;
+ int ret = 0;
+
+ conf = this->private;
+
+ prev = cookie;
+ local = frame->local;
+ loc = &local->loc;
+
+ if (ENTRY_MISSING (op_ret, op_errno)) {
+ if (conf->search_unhashed) {
+ local->op_errno = ENOENT;
+ dht_lookup_everywhere (frame, this, loc);
+ return 0;
+ }
+ }
+
+ if (op_ret == -1)
+ goto out;
+
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr,
+ conf->link_xattr_name);
+ is_dir = check_is_dir (inode, stbuf, xattr);
+
+ if (!is_dir && !is_linkfile) {
+ /* non-directory and not a linkfile */
+
+ ret = dht_layout_preset (this, prev->this, inode);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "could not set pre-set layout for subvol %s",
+ prev->this->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ goto out;
+ }
+
+ if (is_dir) {
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+
+ local->inode = inode_ref (inode);
+ local->xattr = dict_ref (xattr);
+
+ local->op_ret = 0;
+ local->op_errno = 0;
+
+ local->layout = dht_layout_new (this, conf->subvolume_cnt);
+ if (!local->layout) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_lookup_dir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup,
+ &local->loc, local->xattr_req);
+ }
+ }
+
+ if (is_linkfile) {
+ subvol = dht_linkfile_subvol (this, inode, stbuf, xattr);
+
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "linkfile not having link subvolume. path=%s",
+ loc->path);
+ dht_lookup_everywhere (frame, this, loc);
+ return 0;
+ }
+
+ STACK_WIND (frame, dht_lookup_linkfile_cbk,
+ subvol, subvol->fops->lookup,
+ &local->loc, local->xattr_req);
+ }
+
+ return 0;
+
+out:
+ if (!local->hashed_subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no subvolume in layout for path=%s",
+ local->loc.path);
+ local->op_errno = ENOENT;
+ dht_lookup_everywhere (frame, this, loc);
+ return 0;
+ }
+
+ STACK_WIND (frame, dht_lookup_cbk,
+ local->hashed_subvol, local->hashed_subvol->fops->lookup,
+ &local->loc, local->xattr_req);
+
+ return 0;
+
+err:
+ DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno,
+ inode, stbuf, xattr, NULL);
+ return 0;
+}
+
+int
+switch_lookup (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xattr_req)
+{
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *cached_subvol = NULL;
+ xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int op_errno = -1;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int call_cnt = 0;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ conf = this->private;
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_LOOKUP);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ if (xattr_req) {
+ local->xattr_req = dict_ref (xattr_req);
+ } else {
+ local->xattr_req = dict_new ();
+ }
+
+ hashed_subvol = dht_subvol_get_hashed (this, &local->loc);
+ cached_subvol = local->cached_subvol;
+
+ local->hashed_subvol = hashed_subvol;
+
+ if (is_revalidate (loc)) {
+ layout = local->layout;
+ if (!layout) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "revalidate without cache. path=%s",
+ loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (layout->gen && (layout->gen < conf->gen)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "incomplete layout failure for path=%s",
+ loc->path);
+ dht_layout_unref (this, local->layout);
+ goto do_fresh_lookup;
+ }
+
+ local->inode = inode_ref (loc->inode);
+
+ local->call_cnt = layout->cnt;
+ call_cnt = local->call_cnt;
+
+ /* NOTE: we don't require 'trusted.glusterfs.dht.linkto'
+ * attribute, revalidates directly go to the cached-subvolume.
+ */
+ ret = dict_set_uint32 (local->xattr_req,
+ conf->xattr_name, 4 * 4);
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set dict value for %s",
+ conf->xattr_name);
+
+ for (i = 0; i < layout->cnt; i++) {
+ subvol = layout->list[i].xlator;
+
+ STACK_WIND (frame, dht_revalidate_cbk,
+ subvol, subvol->fops->lookup,
+ loc, local->xattr_req);
+
+ if (!--call_cnt)
+ break;
+ }
+ } else {
+ do_fresh_lookup:
+ ret = dict_set_uint32 (local->xattr_req,
+ conf->xattr_name, 4 * 4);
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set dict value for %s",
+ conf->xattr_name);
+
+ ret = dict_set_uint32 (local->xattr_req,
+ conf->link_xattr_name, 256);
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set dict value for %s",
+ conf->link_xattr_name);
+
+ if (!hashed_subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no subvolume in layout for path=%s, "
+ "checking on all the subvols to see if "
+ "it is a directory", loc->path);
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+
+ local->layout = dht_layout_new (this,
+ conf->subvolume_cnt);
+ if (!local->layout) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_lookup_dir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup,
+ &local->loc, local->xattr_req);
+ }
+ return 0;
+ }
+
+ /* */
+ cached_subvol = get_switch_matching_subvol (loc->path, conf,
+ hashed_subvol);
+ if (cached_subvol == hashed_subvol) {
+ STACK_WIND (frame, dht_lookup_cbk,
+ hashed_subvol,
+ hashed_subvol->fops->lookup,
+ loc, local->xattr_req);
+ } else {
+ STACK_WIND (frame, switch_local_lookup_cbk,
+ cached_subvol,
+ cached_subvol->fops->lookup,
+ loc, local->xattr_req);
+ }
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (lookup, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+switch_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret == -1)
+ goto err;
+
+ STACK_WIND (frame, dht_create_cbk,
+ local->cached_subvol, local->cached_subvol->fops->create,
+ &local->loc, local->flags, local->mode, local->umask,
+ local->fd, local->params);
+
+ return 0;
+
+err:
+ DHT_STACK_UNWIND (create, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+switch_create (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *params)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *avail_subvol = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+
+ conf = this->private;
+
+ dht_get_du_info (frame, this, loc);
+
+ local = dht_local_init (frame, loc, fd, GF_FOP_CREATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = dht_subvol_get_hashed (this, loc);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ avail_subvol = get_switch_matching_subvol (loc->path, conf, subvol);
+ if (dht_is_subvol_filled (this, avail_subvol)) {
+ avail_subvol =
+ dht_free_disk_available_subvol (this, avail_subvol,
+ local);
+ }
+
+ if (subvol != avail_subvol) {
+ /* create a link file instead of actual file */
+ local->mode = mode;
+ local->flags = flags;
+ local->umask = umask;
+ local->cached_subvol = avail_subvol;
+ dht_linkfile_create (frame, switch_create_linkfile_create_cbk,
+ this, avail_subvol, subvol, loc);
+ return 0;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "creating %s on %s", loc->path, subvol->name);
+
+ STACK_WIND (frame, dht_create_cbk,
+ subvol, subvol->fops->create,
+ loc, flags, mode, umask, fd, params);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (create, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int
+switch_mknod_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local || !local->cached_subvol) {
+ op_errno = EINVAL;
+ op_ret = -1;
+ goto err;
+ }
+
+ if (op_ret >= 0) {
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk,
+ (void *)local->cached_subvol, local->cached_subvol,
+ local->cached_subvol->fops->mknod,
+ &local->loc, local->mode, local->rdev,
+ local->umask, local->params);
+
+ return 0;
+ }
+err:
+ DHT_STACK_UNWIND (link, frame, op_ret, op_errno,
+ inode, stbuf, preparent, postparent, xdata);
+ return 0;
+}
+
+
+int
+switch_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *params)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *avail_subvol = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+
+ conf = this->private;
+
+ dht_get_du_info (frame, this, loc);
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_MKNOD);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = dht_subvol_get_hashed (this, loc);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ /* Consider the disksize in consideration */
+ avail_subvol = get_switch_matching_subvol (loc->path, conf, subvol);
+ if (dht_is_subvol_filled (this, avail_subvol)) {
+ avail_subvol =
+ dht_free_disk_available_subvol (this, avail_subvol,
+ local);
+ }
+
+ if (avail_subvol != subvol) {
+ /* Create linkfile first */
+
+ local->params = dict_ref (params);
+ local->mode = mode;
+ local->umask = umask;
+ local->rdev = rdev;
+ local->cached_subvol = avail_subvol;
+
+ dht_linkfile_create (frame, switch_mknod_linkfile_cbk,
+ this, avail_subvol, subvol, loc);
+ return 0;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "creating %s on %s", loc->path, subvol->name);
+
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)subvol, subvol,
+ subvol->fops->mknod, loc, mode, rdev, umask,
+ params);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (mknod, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+void
+switch_fini (xlator_t *this)
+{
+ dht_conf_t *conf = NULL;
+ struct switch_struct *trav = NULL;
+ struct switch_struct *prev = NULL;
+
+ conf = this->private;
+
+ if (conf) {
+ trav = (struct switch_struct *)conf->private;
+ conf->private = NULL;
+ while (trav) {
+ GF_FREE (trav->array);
+ prev = trav;
+ trav = trav->next;
+ GF_FREE (prev);
+ }
+ }
+
+ dht_fini(this);
+}
+
+int
+set_switch_pattern (xlator_t *this, dht_conf_t *conf,
+ const char *pattern_str)
+{
+ int flag = 0;
+ int idx = 0;
+ int index = 0;
+ int child_count = 0;
+ char *tmp = NULL;
+ char *tmp1 = NULL;
+ char *child = NULL;
+ char *tmp_str = NULL;
+ char *tmp_str1 = NULL;
+ char *dup_str = NULL;
+ char *dup_childs = NULL;
+ char *switch_str = NULL;
+ char *pattern = NULL;
+ char *childs = NULL;
+ char *option_string = NULL;
+ struct switch_struct *switch_buf = NULL;
+ struct switch_struct *switch_opt = NULL;
+ struct switch_struct *trav = NULL;
+ struct switch_sched_array *switch_buf_array = NULL;
+ xlator_list_t *trav_xl = NULL;
+
+ trav_xl = this->children;
+ while (trav_xl) {
+ index++;
+ trav_xl = trav_xl->next;
+ }
+ child_count = index;
+ switch_buf_array = GF_CALLOC ((index + 1),
+ sizeof (struct switch_sched_array),
+ gf_switch_mt_switch_sched_array);
+ if (!switch_buf_array)
+ goto err;
+
+ trav_xl = this->children;
+ index = 0;
+
+ while (trav_xl) {
+ switch_buf_array[index].xl = trav_xl->xlator;
+ switch_buf_array[index].eligible = 1;
+ trav_xl = trav_xl->next;
+ index++;
+ }
+
+ /* *jpg:child1,child2;*mpg:child3;*:child4,child5,child6 */
+
+ /* Get the pattern for considering switch case.
+ "option block-size *avi:10MB" etc */
+ option_string = gf_strdup (pattern_str);
+ switch_str = strtok_r (option_string, ";", &tmp_str);
+ while (switch_str) {
+ dup_str = gf_strdup (switch_str);
+ switch_opt = GF_CALLOC (1, sizeof (struct switch_struct),
+ gf_switch_mt_switch_struct);
+ if (!switch_opt) {
+ GF_FREE (dup_str);
+ goto err;
+ }
+
+ pattern = strtok_r (dup_str, ":", &tmp_str1);
+ childs = strtok_r (NULL, ":", &tmp_str1);
+ if (strncmp (pattern, "*", 2) == 0) {
+ gf_log ("switch", GF_LOG_INFO,
+ "'*' pattern will be taken by default "
+ "for all the unconfigured child nodes,"
+ " hence neglecting current option");
+ switch_str = strtok_r (NULL, ";", &tmp_str);
+ GF_FREE (switch_opt);
+ GF_FREE (dup_str);
+ continue;
+ }
+ memcpy (switch_opt->path_pattern, pattern, strlen (pattern));
+ if (childs) {
+ dup_childs = gf_strdup (childs);
+ child = strtok_r (dup_childs, ",", &tmp);
+ while (child) {
+ if (gf_switch_valid_child (this, child)) {
+ idx++;
+ child = strtok_r (NULL, ",", &tmp);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s is not a subvolume of %s. "
+ "pattern can only be scheduled "
+ "only to a subvolume of %s",
+ child, this->name, this->name);
+ goto err;
+ }
+ }
+ GF_FREE (dup_childs);
+ child = strtok_r (childs, ",", &tmp1);
+ switch_opt->num_child = idx;
+ switch_opt->array = GF_CALLOC (1, (idx *
+ sizeof (struct switch_sched_array)),
+ gf_switch_mt_switch_sched_array);
+ if (!switch_opt->array)
+ goto err;
+ idx = 0;
+ while (child) {
+ for (index = 0; index < child_count; index++) {
+ if (strcmp (switch_buf_array[index].xl->name,
+ child) == 0) {
+ gf_log ("switch", GF_LOG_DEBUG,
+ "'%s' pattern will be "
+ "scheduled to \"%s\"",
+ switch_opt->path_pattern, child);
+ /*
+ if (switch_buf_array[index-1].considered) {
+ gf_log ("switch", GF_LOG_DEBUG,
+ "ambiguity found, exiting");
+ return -1;
+ }
+ */
+ switch_opt->array[idx].xl = switch_buf_array[index].xl;
+ switch_buf_array[index].considered = 1;
+ idx++;
+ break;
+ }
+ }
+ child = strtok_r (NULL, ",", &tmp1);
+ }
+ } else {
+ /* error */
+ gf_log ("switch", GF_LOG_ERROR,
+ "Check \"scheduler.switch.case\" "
+ "option in unify volume. Exiting");
+ goto err;
+ }
+ GF_FREE (dup_str);
+
+ /* Link it to the main structure */
+ if (switch_buf) {
+ /* there are already few entries */
+ trav = switch_buf;
+ while (trav->next)
+ trav = trav->next;
+ trav->next = switch_opt;
+ } else {
+ /* First entry */
+ switch_buf = switch_opt;
+ }
+ switch_opt = NULL;
+ switch_str = strtok_r (NULL, ";", &tmp_str);
+ }
+
+ /* Now, all the pattern based considerations done, so for all the
+ * remaining pattern, '*' to all the remaining child nodes
+ */
+ {
+ for (index=0; index < child_count; index++) {
+ /* check for considered flag */
+ if (switch_buf_array[index].considered)
+ continue;
+ flag++;
+ }
+ if (!flag) {
+ gf_log ("switch", GF_LOG_ERROR,
+ "No nodes left for pattern '*'. Exiting");
+ goto err;
+ }
+ switch_opt = GF_CALLOC (1, sizeof (struct switch_struct),
+ gf_switch_mt_switch_struct);
+ if (!switch_opt)
+ goto err;
+
+ /* Add the '*' pattern to the array */
+ memcpy (switch_opt->path_pattern, "*", 2);
+ switch_opt->num_child = flag;
+ switch_opt->array =
+ GF_CALLOC (1,
+ flag * sizeof (struct switch_sched_array),
+ gf_switch_mt_switch_sched_array);
+ if (!switch_opt->array)
+ goto err;
+ flag = 0;
+ for (index=0; index < child_count; index++) {
+ /* check for considered flag */
+ if (switch_buf_array[index].considered)
+ continue;
+ gf_log ("switch", GF_LOG_DEBUG,
+ "'%s' pattern will be scheduled to \"%s\"",
+ switch_opt->path_pattern,
+ switch_buf_array[index].xl->name);
+ switch_opt->array[flag].xl =
+ switch_buf_array[index].xl;
+ switch_buf_array[index].considered = 1;
+ flag++;
+ }
+ if (switch_buf) {
+ /* there are already few entries */
+ trav = switch_buf;
+ while (trav->next)
+ trav = trav->next;
+ trav->next = switch_opt;
+ } else {
+ /* First entry */
+ switch_buf = switch_opt;
+ }
+ switch_opt = NULL;
+ }
+ /* */
+ conf->private = switch_buf;
+
+ return 0;
+err:
+ GF_FREE (switch_buf_array);
+ GF_FREE (switch_opt);
+
+ if (switch_buf) {
+ trav = switch_buf;
+ while (trav) {
+ GF_FREE (trav->array);
+ switch_opt = trav;
+ trav = trav->next;
+ GF_FREE (switch_opt);
+ }
+ }
+ return -1;
+}
+
+
+int32_t
+switch_init (xlator_t *this)
+{
+ dht_conf_t *conf = NULL;
+ data_t *data = NULL;
+ int ret = -1;
+
+ ret = dht_init(this);
+ if (ret) {
+ return ret;
+ }
+ conf = this->private;
+
+ data = dict_get (this->options, "pattern.switch.case");
+ if (data) {
+ /* TODO: */
+ ret = set_switch_pattern (this, conf, data->data);
+ if (ret) {
+ goto err;
+ }
+ }
+
+ this->private = conf;
+ return 0;
+
+err:
+ dht_fini(this);
+ return -1;
+}
+
+
+class_methods_t class_methods = {
+ .init = switch_init,
+ .fini = switch_fini,
+ .reconfigure = dht_reconfigure,
+ .notify = dht_notify
+};
+
+
+struct xlator_fops fops = {
+ .lookup = switch_lookup,
+ .create = switch_create,
+ .mknod = switch_mknod,
+
+ .stat = dht_stat,
+ .fstat = dht_fstat,
+ .truncate = dht_truncate,
+ .ftruncate = dht_ftruncate,
+ .access = dht_access,
+ .readlink = dht_readlink,
+ .setxattr = dht_setxattr,
+ .getxattr = dht_getxattr,
+ .removexattr = dht_removexattr,
+ .open = dht_open,
+ .readv = dht_readv,
+ .writev = dht_writev,
+ .flush = dht_flush,
+ .fsync = dht_fsync,
+ .statfs = dht_statfs,
+ .lk = dht_lk,
+ .opendir = dht_opendir,
+ .readdir = dht_readdir,
+ .readdirp = dht_readdirp,
+ .fsyncdir = dht_fsyncdir,
+ .symlink = dht_symlink,
+ .unlink = dht_unlink,
+ .link = dht_link,
+ .mkdir = dht_mkdir,
+ .rmdir = dht_rmdir,
+ .rename = dht_rename,
+ .inodelk = dht_inodelk,
+ .finodelk = dht_finodelk,
+ .entrylk = dht_entrylk,
+ .fentrylk = dht_fentrylk,
+ .xattrop = dht_xattrop,
+ .fxattrop = dht_fxattrop,
+ .setattr = dht_setattr,
+};
+
+
+struct xlator_cbks cbks = {
+ .forget = dht_forget
+};
diff --git a/xlators/cluster/ha/src/Makefile.am b/xlators/cluster/ha/src/Makefile.am
index 5f78a2965..5c1364b7f 100644
--- a/xlators/cluster/ha/src/Makefile.am
+++ b/xlators/cluster/ha/src/Makefile.am
@@ -1,15 +1,16 @@
xlator_LTLIBRARIES = ha.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/cluster
-ha_la_LDFLAGS = -module -avoidversion
+ha_la_LDFLAGS = -module -avoid-version
ha_la_SOURCES = ha-helpers.c ha.c
ha_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = ha.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/cluster/ha/src/ha-helpers.c b/xlators/cluster/ha/src/ha-helpers.c
index 4bc7d5e20..19be1ed27 100644
--- a/xlators/cluster/ha/src/ha-helpers.c
+++ b/xlators/cluster/ha/src/ha-helpers.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include "xlator.h"
#include "call-stub.h"
#include "defaults.h"
@@ -49,12 +39,14 @@ int ha_alloc_init_fd (call_frame_t *frame, fd_t *fd)
goto out;
}
hafdp = (hafd_t *)(long)tmp_hafdp;
- local = frame->local = CALLOC (1, sizeof (*local));
+ local = frame->local = GF_CALLOC (1, sizeof (*local),
+ gf_ha_mt_ha_local_t);
if (local == NULL) {
ret = -ENOMEM;
goto out;
}
- local->state = CALLOC (1, child_count);
+ local->state = GF_CALLOC (1, child_count,
+ gf_ha_mt_child_count);
if (local->state == NULL) {
ret = -ENOMEM;
goto out;
@@ -141,11 +133,17 @@ int ha_handle_cbk (call_frame_t *frame, void *cookie, int op_ret, int op_errno)
}
}
}
- if (local->stub)
+ if (local->stub) {
call_stub_destroy (local->stub);
+ local->stub = NULL;
+ }
+
if (local->fd) {
- FREE (local->state);
+ GF_FREE (local->state);
+ local->state = NULL;
+
fd_unref (local->fd);
+ local->fd = NULL;
}
return 0;
}
@@ -164,7 +162,8 @@ int ha_alloc_init_inode (call_frame_t *frame, inode_t *inode)
local = frame->local;
if (local == NULL) {
- local = frame->local = CALLOC (1, sizeof (*local));
+ local = frame->local = GF_CALLOC (1, sizeof (*local),
+ gf_ha_mt_ha_local_t);
if (local == NULL) {
ret = -ENOMEM;
goto out;
diff --git a/xlators/cluster/ha/src/ha-mem-types.h b/xlators/cluster/ha/src/ha-mem-types.h
new file mode 100644
index 000000000..e5e97d237
--- /dev/null
+++ b/xlators/cluster/ha/src/ha-mem-types.h
@@ -0,0 +1,26 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __HA_MEM_TYPES_H__
+#define __HA_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_ha_mem_types_ {
+ gf_ha_mt_ha_local_t = gf_common_mt_end + 1,
+ gf_ha_mt_hafd_t,
+ gf_ha_mt_char,
+ gf_ha_mt_child_count,
+ gf_ha_mt_xlator_t,
+ gf_ha_mt_ha_private_t,
+ gf_ha_mt_end
+};
+#endif
+
diff --git a/xlators/cluster/ha/src/ha.c b/xlators/cluster/ha/src/ha.c
index b938071ed..3eccb516b 100644
--- a/xlators/cluster/ha/src/ha.c
+++ b/xlators/cluster/ha/src/ha.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
/* generate errors randomly, code is simple now, better alogorithm
* can be written to decide what error to be returned and when
*/
@@ -41,6 +31,41 @@
* - do not alloc the call-stub in case only one subvol is up.
*/
+void
+ha_local_wipe (ha_local_t *local)
+{
+ if (local->stub) {
+ call_stub_destroy (local->stub);
+ local->stub = NULL;
+ }
+
+ if (local->state) {
+ GF_FREE (local->state);
+ local->state = NULL;
+ }
+
+ if (local->dict) {
+ dict_unref (local->dict);
+ local->dict = NULL;
+ }
+
+ loc_wipe (&local->loc);
+
+ if (local->fd) {
+ fd_unref (local->fd);
+ local->fd = NULL;
+ }
+
+ if (local->inode) {
+ inode_unref (local->inode);
+ local->inode = NULL;
+ }
+
+ GF_FREE (local);
+ return;
+}
+
+
int
ha_forget (xlator_t *this,
inode_t *inode)
@@ -49,7 +74,7 @@ ha_forget (xlator_t *this,
char *state = NULL;
if (!inode_ctx_del (inode, this, &stateino)) {
state = ((char *)(long)stateino);
- FREE (state);
+ GF_FREE (state);
}
return 0;
@@ -63,8 +88,9 @@ ha_lookup_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf,
- dict_t *dict)
+ struct iatt *buf,
+ dict_t *dict,
+ struct iatt *postparent)
{
ha_local_t *local = NULL;
ha_private_t *pvt = NULL;
@@ -106,6 +132,7 @@ ha_lookup_cbk (call_frame_t *frame,
if (local->op_ret == -1 && op_ret == 0) {
local->op_ret = 0;
local->buf = *buf;
+ local->postparent = *postparent;
if (dict)
local->dict = dict_ref (dict);
}
@@ -127,7 +154,8 @@ ha_lookup_cbk (call_frame_t *frame,
local->op_errno,
inode,
&local->buf,
- ctx);
+ ctx,
+ &local->postparent);
if (inode)
inode_unref (inode);
if (ctx)
@@ -148,19 +176,33 @@ ha_lookup (call_frame_t *frame,
char *state = NULL;
xlator_t **children = NULL;
int ret = -1;
+ int32_t op_errno = EINVAL;
local = frame->local;
pvt = this->private;
child_count = pvt->child_count;
children = pvt->children;
- frame->local = local = CALLOC (1, sizeof (*local));
+ frame->local = local = GF_CALLOC (1, sizeof (*local),
+ gf_ha_mt_ha_local_t);
+ if (!local) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
child_count = pvt->child_count;
local->inode = inode_ref (loc->inode);
ret = inode_ctx_get (loc->inode, this, NULL);
if (ret) {
- state = CALLOC (1, child_count);
+ state = GF_CALLOC (1, child_count, gf_ha_mt_child_count);
+ if (state == NULL) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
inode_ctx_put (loc->inode, this, (uint64_t)(long)state);
} else
local->revalidate = 1;
@@ -178,6 +220,14 @@ ha_lookup (call_frame_t *frame,
xattr_req);
}
return 0;
+
+unwind:
+ local = frame->local;
+ frame->local = NULL;
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL, NULL);
+
+ ha_local_wipe (local);
+ return 0;
}
int32_t
@@ -186,7 +236,7 @@ ha_stat_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct stat *buf)
+ struct iatt *buf)
{
int ret = -1;
@@ -229,135 +279,25 @@ err:
return 0;
}
- int32_t
-ha_chmod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- int ret = -1;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
-
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- }
- return 0;
-}
-
int32_t
-ha_chmod (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_inode (frame, loc->inode);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_chmod_stub (frame, ha_chmod, loc, mode);
-
- STACK_WIND_COOKIE (frame,
- ha_chmod_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->chmod,
- loc,
- mode);
- return 0;
-err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
-}
-
- int32_t
-ha_fchmod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+ha_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost)
{
int ret = -1;
ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- }
- return 0;
-}
-
-int32_t
-ha_fchmod (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- mode_t mode)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_fd (frame, fd);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
+ STACK_UNWIND (frame, op_ret, op_errno, statpre, statpost);
}
- local = frame->local;
- local->stub = fop_fchmod_stub (frame, ha_fchmod, fd, mode);
-
- STACK_WIND_COOKIE (frame,
- ha_fchmod_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->fchmod,
- fd,
- mode);
- return 0;
-err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
return 0;
}
- int32_t
-ha_chown_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- int ret = -1;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
-
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- }
- return 0;
-}
int32_t
-ha_chown (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- uid_t uid,
- gid_t gid)
+ha_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ int32_t valid)
{
ha_local_t *local = NULL;
int op_errno = 0;
@@ -368,49 +308,24 @@ ha_chown (call_frame_t *frame,
goto err;
}
local = frame->local;
- local->stub = fop_chown_stub (frame, ha_chown, loc, uid, gid);
+ local->stub = fop_setattr_stub (frame, ha_setattr, loc, stbuf, valid);
- STACK_WIND_COOKIE (frame,
- ha_chown_cbk,
+ STACK_WIND_COOKIE (frame,
+ ha_setattr_cbk,
(void *)(long)local->active,
HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->chown,
- loc,
- uid,
- gid);
+ HA_ACTIVE_CHILD(this, local)->fops->setattr,
+ loc, stbuf, valid);
return 0;
err:
- STACK_UNWIND (frame, -1, ENOTCONN, NULL);
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
return 0;
}
- int32_t
-ha_fchown_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- int ret = -1;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- }
- return 0;
-}
-
-int32_t
-ha_fchown (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- uid_t uid,
- gid_t gid)
+int32_t
+ha_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
+ int32_t valid)
{
ha_local_t *local = NULL;
int op_errno = 0;
@@ -421,29 +336,29 @@ ha_fchown (call_frame_t *frame,
goto err;
}
local = frame->local;
- local->stub = fop_fchown_stub (frame, ha_fchown, fd, uid, gid);
+ local->stub = fop_fsetattr_stub (frame, ha_fsetattr, fd, stbuf, valid);
STACK_WIND_COOKIE (frame,
- ha_fchown_cbk,
+ ha_setattr_cbk,
(void *)(long)local->active,
HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->fchown,
- fd,
- uid,
- gid);
+ HA_ACTIVE_CHILD(this, local)->fops->fsetattr,
+ fd, stbuf, valid);
return 0;
err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
return 0;
}
- int32_t
+
+int32_t
ha_truncate_cbk (call_frame_t *frame,
void *cookie,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct stat *buf)
+ struct iatt *prebuf,
+ struct iatt *postbuf)
{
int ret = -1;
@@ -452,7 +367,8 @@ ha_truncate_cbk (call_frame_t *frame,
STACK_UNWIND (frame,
op_ret,
op_errno,
- buf);
+ prebuf,
+ postbuf);
}
return 0;
}
@@ -473,6 +389,11 @@ ha_truncate (call_frame_t *frame,
}
local = frame->local;
local->stub = fop_truncate_stub (frame, ha_truncate, loc, offset);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
STACK_WIND_COOKIE (frame,
ha_truncate_cbk,
@@ -483,7 +404,7 @@ ha_truncate (call_frame_t *frame,
offset);
return 0;
err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -493,7 +414,8 @@ ha_ftruncate_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct stat *buf)
+ struct iatt *prebuf,
+ struct iatt *postbuf)
{
int ret = -1;
@@ -503,7 +425,7 @@ ha_ftruncate_cbk (call_frame_t *frame,
STACK_UNWIND (frame,
op_ret,
op_errno,
- buf);
+ prebuf, postbuf);
}
return 0;
}
@@ -524,6 +446,11 @@ ha_ftruncate (call_frame_t *frame,
}
local = frame->local;
local->stub = fop_ftruncate_stub (frame, ha_ftruncate, fd, offset);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
STACK_WIND_COOKIE (frame,
ha_ftruncate_cbk,
@@ -534,58 +461,12 @@ ha_ftruncate (call_frame_t *frame,
offset);
return 0;
err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
-}
-
-int32_t
-ha_utimens_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- int ret = -1;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
-
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- }
- return 0;
-}
-
-int32_t
-ha_utimens (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- struct timespec tv[2])
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
+ local = frame->local;
+ frame->local = NULL;
- op_errno = ha_alloc_init_inode (frame, loc->inode);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_utimens_stub (frame, ha_utimens, loc, tv);
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
- STACK_WIND_COOKIE (frame,
- ha_utimens_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->utimens,
- loc,
- tv);
- return 0;
-err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+ ha_local_wipe (local);
return 0;
}
@@ -624,6 +505,11 @@ ha_access (call_frame_t *frame,
}
local = frame->local;
local->stub = fop_access_stub (frame, ha_access, loc, mask);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
STACK_WIND_COOKIE (frame,
ha_access_cbk,
@@ -634,7 +520,12 @@ ha_access (call_frame_t *frame,
mask);
return 0;
err:
+ local = frame->local;
+ frame->local = NULL;
+
STACK_UNWIND (frame, -1, op_errno);
+
+ ha_local_wipe (local);
return 0;
}
@@ -645,7 +536,8 @@ ha_readlink_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- const char *path)
+ const char *path,
+ struct iatt *sbuf)
{
int ret = -1;
@@ -655,7 +547,8 @@ ha_readlink_cbk (call_frame_t *frame,
STACK_UNWIND (frame,
op_ret,
op_errno,
- path);
+ path,
+ sbuf);
}
return 0;
}
@@ -686,7 +579,7 @@ ha_readlink (call_frame_t *frame,
size);
return 0;
err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -697,8 +590,9 @@ ha_mknod_lookup_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf,
- dict_t *dict)
+ struct iatt *buf,
+ dict_t *dict,
+ struct iatt *postparent)
{
ha_local_t *local = NULL;
ha_private_t *pvt = NULL;
@@ -742,12 +636,13 @@ ha_mknod_lookup_cbk (call_frame_t *frame,
if (cnt == 0) {
call_stub_t *stub = local->stub;
- FREE (local->state);
+ GF_FREE (local->state);
STACK_UNWIND (frame,
local->op_ret,
local->op_errno,
local->stub->args.mknod.loc.inode,
- &local->buf);
+ &local->buf, &local->preparent,
+ &local->postparent);
call_stub_destroy (stub);
}
return 0;
@@ -760,7 +655,9 @@ ha_mknod_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf)
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent)
{
ha_local_t *local = NULL;
ha_private_t *pvt = NULL;
@@ -798,6 +695,8 @@ ha_mknod_cbk (call_frame_t *frame,
local->op_ret = 0;
local->first_success = 1;
local->buf = *buf;
+ local->preparent = *preparent;
+ local->postparent = *postparent;
}
cnt = --local->call_count;
for (i = local->active + 1; i < child_count; i++) {
@@ -807,9 +706,11 @@ ha_mknod_cbk (call_frame_t *frame,
if (cnt == 0 || i == child_count) {
call_stub_t *stub = local->stub;
- FREE (local->state);
+ GF_FREE (local->state);
stub = local->stub;
- STACK_UNWIND (frame, local->op_ret, local->op_errno, local->stub->args.mknod.loc.inode, &local->buf);
+ STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ local->stub->args.mknod.loc.inode, &local->buf,
+ &local->preparent, &local->postparent);
call_stub_destroy (stub);
return 0;
}
@@ -854,20 +755,46 @@ ha_mknod (call_frame_t *frame,
ha_private_t *pvt = NULL;
int child_count = 0, i = 0;
char *stateino = NULL;
+ int32_t op_errno = EINVAL;
local = frame->local;
pvt = this->private;
child_count = pvt->child_count;
- frame->local = local = CALLOC (1, sizeof (*local));
+ frame->local = local = GF_CALLOC (1, sizeof (*local),
+ gf_ha_mt_ha_local_t);
+ if (!local) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
local->stub = fop_mknod_stub (frame, ha_mknod, loc, mode, rdev);
+ if (!local->stub) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
local->op_ret = -1;
local->op_errno = ENOTCONN;
- local->state = CALLOC (1, child_count);
+ local->state = GF_CALLOC (1, child_count, gf_ha_mt_char);
+ if (!local->state) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
memcpy (local->state, pvt->state, child_count);
local->active = -1;
- stateino = CALLOC (1, child_count);
+ stateino = GF_CALLOC (1, child_count, gf_ha_mt_char);
+ if (!stateino) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
inode_ctx_put (loc->inode, this, (uint64_t)(long)stateino);
for (i = 0; i < child_count; i++) {
@@ -884,6 +811,14 @@ ha_mknod (call_frame_t *frame,
HA_ACTIVE_CHILD(this, local)->fops->mknod,
loc, mode, rdev);
return 0;
+
+err:
+ local = frame->local;
+ frame->local = NULL;
+
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ ha_local_wipe (local);
+ return 0;
}
@@ -894,8 +829,9 @@ ha_mkdir_lookup_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf,
- dict_t *dict)
+ struct iatt *buf,
+ dict_t *dict,
+ struct iatt *postparent)
{
ha_local_t *local = NULL;
ha_private_t *pvt = NULL;
@@ -931,12 +867,12 @@ ha_mkdir_lookup_cbk (call_frame_t *frame,
if (cnt == 0) {
call_stub_t *stub = local->stub;
- FREE (local->state);
+ GF_FREE (local->state);
STACK_UNWIND (frame,
local->op_ret,
local->op_errno,
- local->stub->args.mkdir.loc.inode,
- &local->buf);
+ local->stub->args.mkdir.loc.inode, &local->buf,
+ &local->preparent, &local->postparent);
call_stub_destroy (stub);
}
return 0;
@@ -949,7 +885,9 @@ ha_mkdir_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf)
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent)
{
ha_local_t *local = NULL;
ha_private_t *pvt = NULL;
@@ -983,6 +921,8 @@ ha_mkdir_cbk (call_frame_t *frame,
local->op_ret = 0;
local->first_success = 1;
local->buf = *buf;
+ local->preparent = *preparent;
+ local->postparent = *postparent;
}
cnt = --local->call_count;
for (i = local->active + 1; i < child_count; i++) {
@@ -992,9 +932,11 @@ ha_mkdir_cbk (call_frame_t *frame,
if (cnt == 0 || i == child_count) {
call_stub_t *stub = local->stub;
- FREE (local->state);
+ GF_FREE (local->state);
stub = local->stub;
- STACK_UNWIND (frame, local->op_ret, local->op_errno, local->stub->args.mkdir.loc.inode, &local->buf);
+ STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ local->stub->args.mkdir.loc.inode, &local->buf,
+ &local->preparent, &local->postparent);
call_stub_destroy (stub);
return 0;
}
@@ -1037,20 +979,46 @@ ha_mkdir (call_frame_t *frame,
ha_private_t *pvt = NULL;
int child_count = 0, i = 0;
char *stateino = NULL;
+ int32_t op_errno = EINVAL;
local = frame->local;
pvt = this->private;
child_count = pvt->child_count;
- frame->local = local = CALLOC (1, sizeof (*local));
+ frame->local = local = GF_CALLOC (1, sizeof (*local),
+ gf_ha_mt_ha_local_t);
+ if (!frame->local) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
local->stub = fop_mkdir_stub (frame, ha_mkdir, loc, mode);
+ if (!local->stub) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
local->op_ret = -1;
local->op_errno = ENOTCONN;
- local->state = CALLOC (1, child_count);
+ local->state = GF_CALLOC (1, child_count, gf_ha_mt_char);
+ if (!local->state) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
memcpy (local->state, pvt->state, child_count);
local->active = -1;
- stateino = CALLOC (1, child_count);
+ stateino = GF_CALLOC (1, child_count, gf_ha_mt_char);
+ if (!stateino) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
inode_ctx_put (loc->inode, this, (uint64_t)(long)stateino);
for (i = 0; i < child_count; i++) {
if (local->state[i]) {
@@ -1066,6 +1034,13 @@ ha_mkdir (call_frame_t *frame,
HA_ACTIVE_CHILD(this, local)->fops->mkdir,
loc, mode);
return 0;
+err:
+ local = frame->local;
+ frame->local = NULL;
+
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ ha_local_wipe (local);
+ return 0;
}
int32_t
@@ -1073,13 +1048,15 @@ ha_unlink_cbk (call_frame_t *frame,
void *cookie,
xlator_t *this,
int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno,
+ struct iatt *preparent,
+ struct iatt *postparent)
{
int ret = -1;
ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
if (ret == 0) {
- STACK_UNWIND (frame, op_ret, op_errno);
+ STACK_UNWIND (frame, op_ret, op_errno, preparent, postparent);
}
return 0;
}
@@ -1100,6 +1077,11 @@ ha_unlink (call_frame_t *frame,
}
local = frame->local;
local->stub = fop_unlink_stub (frame, ha_unlink, loc);
+ if (!local->stub) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ op_errno = ENOMEM;
+ goto err;
+ }
STACK_WIND_COOKIE (frame,
ha_unlink_cbk,
@@ -1109,7 +1091,7 @@ ha_unlink (call_frame_t *frame,
loc);
return 0;
err:
- STACK_UNWIND (frame, -1, op_errno);
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -1118,7 +1100,9 @@ ha_rmdir_cbk (call_frame_t *frame,
void *cookie,
xlator_t *this,
int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno,
+ struct iatt *preparent,
+ struct iatt *postparent)
{
int ret = -1;
@@ -1127,7 +1111,9 @@ ha_rmdir_cbk (call_frame_t *frame,
if (ret == 0) {
STACK_UNWIND (frame,
op_ret,
- op_errno);
+ op_errno,
+ preparent,
+ postparent);
}
return 0;
}
@@ -1147,6 +1133,11 @@ ha_rmdir (call_frame_t *frame,
}
local = frame->local;
local->stub = fop_rmdir_stub (frame, ha_rmdir, loc);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
STACK_WIND_COOKIE (frame,
ha_rmdir_cbk,
@@ -1156,7 +1147,7 @@ ha_rmdir (call_frame_t *frame,
loc);
return 0;
err:
- STACK_UNWIND (frame, -1, op_errno);
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -1168,8 +1159,9 @@ ha_symlink_lookup_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf,
- dict_t *dict)
+ struct iatt *buf,
+ dict_t *dict,
+ struct iatt *postparent)
{
ha_local_t *local = NULL;
ha_private_t *pvt = NULL;
@@ -1205,12 +1197,12 @@ ha_symlink_lookup_cbk (call_frame_t *frame,
if (cnt == 0) {
call_stub_t *stub = local->stub;
- FREE (local->state);
+ GF_FREE (local->state);
STACK_UNWIND (frame,
local->op_ret,
local->op_errno,
- local->stub->args.symlink.loc.inode,
- &local->buf);
+ local->stub->args.symlink.loc.inode, &local->buf,
+ &local->preparent, &local->postparent);
call_stub_destroy (stub);
}
return 0;
@@ -1223,7 +1215,9 @@ ha_symlink_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf)
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent)
{
ha_local_t *local = NULL;
ha_private_t *pvt = NULL;
@@ -1256,6 +1250,8 @@ ha_symlink_cbk (call_frame_t *frame,
local->op_ret = 0;
local->first_success = 1;
local->buf = *buf;
+ local->preparent = *preparent;
+ local->postparent = *postparent;
}
cnt = --local->call_count;
for (i = local->active + 1; i < child_count; i++) {
@@ -1265,10 +1261,11 @@ ha_symlink_cbk (call_frame_t *frame,
if (cnt == 0 || i == child_count) {
call_stub_t *stub = local->stub;
- FREE (local->state);
+ GF_FREE (local->state);
stub = local->stub;
STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->stub->args.symlink.loc.inode, &local->buf);
+ local->stub->args.symlink.loc.inode, &local->buf,
+ &local->preparent, &local->postparent);
call_stub_destroy (stub);
return 0;
}
@@ -1311,20 +1308,46 @@ ha_symlink (call_frame_t *frame,
ha_private_t *pvt = NULL;
int child_count = 0, i = 0;
char *stateino = NULL;
+ int32_t op_errno = EINVAL;
local = frame->local;
pvt = this->private;
child_count = pvt->child_count;
- frame->local = local = CALLOC (1, sizeof (*local));
+ frame->local = local = GF_CALLOC (1, sizeof (*local),
+ gf_ha_mt_ha_local_t);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
local->stub = fop_symlink_stub (frame, ha_symlink, linkname, loc);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
local->op_ret = -1;
local->op_errno = ENOTCONN;
- local->state = CALLOC (1, child_count);
+ local->state = GF_CALLOC (1, child_count, gf_ha_mt_char);
+ if (!local->state) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
memcpy (local->state, pvt->state, child_count);
local->active = -1;
- stateino = CALLOC (1, child_count);
+ stateino = GF_CALLOC (1, child_count, gf_ha_mt_char);
+ if (!stateino) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
inode_ctx_put (loc->inode, this, (uint64_t)(long)stateino);
for (i = 0; i < child_count; i++) {
@@ -1342,6 +1365,12 @@ ha_symlink (call_frame_t *frame,
HA_ACTIVE_CHILD(this, local)->fops->symlink,
linkname, loc);
return 0;
+err:
+ local = frame->local;
+ frame->local = NULL;
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ ha_local_wipe (local);
+ return 0;
}
int32_t
@@ -1350,14 +1379,19 @@ ha_rename_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct stat *buf)
+ struct iatt *buf,
+ struct iatt *preoldparent,
+ struct iatt *postoldparent,
+ struct iatt *prenewparent,
+ struct iatt *postnewparent)
{
int ret = -1;
ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
if (ret == 0) {
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ STACK_UNWIND (frame, op_ret, op_errno, buf, preoldparent,
+ postoldparent, prenewparent, postnewparent);
}
return 0;
}
@@ -1378,6 +1412,12 @@ ha_rename (call_frame_t *frame,
}
local = frame->local;
local->stub = fop_rename_stub (frame, ha_rename, oldloc, newloc);
+ if (!local->stub) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
STACK_WIND_COOKIE (frame,
ha_rename_cbk,
(void *)(long)local->active,
@@ -1386,7 +1426,7 @@ ha_rename (call_frame_t *frame,
oldloc, newloc);
return 0;
err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -1397,8 +1437,9 @@ ha_link_lookup_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf,
- dict_t *dict)
+ struct iatt *buf,
+ dict_t *dict,
+ struct iatt *postparent)
{
ha_local_t *local = NULL;
ha_private_t *pvt = NULL;
@@ -1434,12 +1475,12 @@ ha_link_lookup_cbk (call_frame_t *frame,
if (cnt == 0) {
call_stub_t *stub = local->stub;
- FREE (local->state);
+ GF_FREE (local->state);
STACK_UNWIND (frame,
local->op_ret,
local->op_errno,
- local->stub->args.link.oldloc.inode,
- &local->buf);
+ local->stub->args.link.oldloc.inode, &local->buf,
+ &local->preparent, &local->postparent);
call_stub_destroy (stub);
}
return 0;
@@ -1452,7 +1493,9 @@ ha_link_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf)
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent)
{
ha_local_t *local = NULL;
ha_private_t *pvt = NULL;
@@ -1485,6 +1528,8 @@ ha_link_cbk (call_frame_t *frame,
local->op_ret = 0;
local->first_success = 1;
local->buf = *buf;
+ local->preparent = *preparent;
+ local->postparent = *postparent;
}
cnt = --local->call_count;
for (i = local->active + 1; i < child_count; i++) {
@@ -1494,9 +1539,11 @@ ha_link_cbk (call_frame_t *frame,
if (cnt == 0 || i == child_count) {
call_stub_t *stub = local->stub;
- FREE (local->state);
+ GF_FREE (local->state);
stub = local->stub;
- STACK_UNWIND (frame, local->op_ret, local->op_errno, local->stub->args.link.oldloc.inode, &local->buf);
+ STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ local->stub->args.link.oldloc.inode, &local->buf,
+ &local->preparent, &local->postparent);
call_stub_destroy (stub);
return 0;
}
@@ -1539,7 +1586,7 @@ ha_link (call_frame_t *frame,
ha_private_t *pvt = NULL;
int child_count = 0, i = 0;
char *stateino = NULL;
- int32_t ret = 0;
+ int32_t ret = 0, op_errno = 0;
uint64_t tmp_stateino = 0;
ret = inode_ctx_get (newloc->inode, this, &tmp_stateino);
@@ -1551,7 +1598,8 @@ ha_link (call_frame_t *frame,
if (stateino == NULL) {
gf_log (this->name, GF_LOG_ERROR,
"newloc->inode's ctx is NULL, returning EINVAL");
- STACK_UNWIND (frame, -1, EINVAL, oldloc->inode, NULL);
+ STACK_UNWIND (frame, -1, EINVAL, oldloc->inode, NULL, NULL,
+ NULL);
return 0;
}
@@ -1559,11 +1607,30 @@ ha_link (call_frame_t *frame,
pvt = this->private;
child_count = pvt->child_count;
- frame->local = local = CALLOC (1, sizeof (*local));
+ frame->local = local = GF_CALLOC (1, sizeof (*local),
+ gf_ha_mt_ha_local_t);
+ if (!frame->local) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
local->stub = fop_link_stub (frame, ha_link, oldloc, newloc);
+ if (!local->stub) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
local->op_ret = -1;
local->op_errno = ENOTCONN;
- local->state = CALLOC (1, child_count);
+ local->state = GF_CALLOC (1, child_count, gf_ha_mt_char);
+ if (!local->state) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
memcpy (local->state, pvt->state, child_count);
local->active = -1;
@@ -1582,6 +1649,11 @@ ha_link (call_frame_t *frame,
oldloc,
newloc);
return 0;
+err:
+ local = frame->local;
+ frame->local = NULL;
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ return 0;
}
int32_t
@@ -1592,7 +1664,9 @@ ha_create_cbk (call_frame_t *frame,
int32_t op_errno,
fd_t *fd,
inode_t *inode,
- struct stat *buf)
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent)
{
ha_local_t *local = NULL;
ha_private_t *pvt = NULL;
@@ -1640,6 +1714,8 @@ ha_create_cbk (call_frame_t *frame,
if (local->op_ret == -1) {
local->op_ret = 0;
local->buf = *buf;
+ local->preparent = *preparent;
+ local->postparent = *postparent;
local->first_success = 1;
}
local->stub->args.create.flags &= (~O_EXCL);
@@ -1658,8 +1734,9 @@ ha_create_cbk (call_frame_t *frame,
call_stub_t *stub = local->stub;
STACK_UNWIND (frame, local->op_ret, local->op_errno,
stub->args.create.fd,
- stub->args.create.loc.inode, &local->buf);
- FREE (state);
+ stub->args.create.loc.inode, &local->buf,
+ &local->preparent, &local->postparent);
+ GF_FREE (state);
call_stub_destroy (stub);
return 0;
}
@@ -1695,6 +1772,7 @@ ha_create (call_frame_t *frame,
char *stateino = NULL;
xlator_t **children = NULL;
hafd_t *hafdp = NULL;
+ int32_t op_errno = EINVAL;
local = frame->local;
pvt = this->private;
@@ -1702,9 +1780,28 @@ ha_create (call_frame_t *frame,
children = pvt->children;
if (local == NULL) {
- local = frame->local = CALLOC (1, sizeof (*local));
+ frame->local = local = GF_CALLOC (1, sizeof (*local),
+ gf_ha_mt_ha_local_t);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
local->stub = fop_create_stub (frame, ha_create, loc, flags, mode, fd);
- local->state = CALLOC (1, child_count);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
+ local->state = GF_CALLOC (1, child_count, gf_ha_mt_char);
+ if (!local->state) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
local->active = -1;
local->op_ret = -1;
local->op_errno = ENOTCONN;
@@ -1718,10 +1815,34 @@ ha_create (call_frame_t *frame,
}
}
/* FIXME handle active -1 */
- stateino = CALLOC (1, child_count);
- hafdp = CALLOC (1, sizeof (*hafdp));
- hafdp->fdstate = CALLOC (1, child_count);
- hafdp->path = strdup(loc->path);
+ stateino = GF_CALLOC (1, child_count, gf_ha_mt_char);
+ if (!stateino) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
+ hafdp = GF_CALLOC (1, sizeof (*hafdp), gf_ha_mt_hafd_t);
+ if (!hafdp) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
+ hafdp->fdstate = GF_CALLOC (1, child_count, gf_ha_mt_char);
+ if (!hafdp->fdstate) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
+ hafdp->path = gf_strdup(loc->path);
+ if (!hafdp->path) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
LOCK_INIT (&hafdp->lock);
fd_ctx_set (fd, this, (uint64_t)(long)hafdp);
inode_ctx_put (loc->inode, this, (uint64_t)(long)stateino);
@@ -1733,6 +1854,26 @@ ha_create (call_frame_t *frame,
children[local->active]->fops->create,
loc, flags, mode, fd);
return 0;
+err:
+ local = frame->local;
+ frame->local = NULL;
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+ ha_local_wipe (local);
+
+ if (stateino) {
+ GF_FREE (stateino);
+ stateino = NULL;
+ }
+
+ if (hafdp) {
+ GF_FREE (hafdp->fdstate);
+
+ GF_FREE (hafdp->path);
+
+ GF_FREE (hafdp);
+ }
+
+ return 0;
}
int32_t
@@ -1789,7 +1930,7 @@ int32_t
ha_open (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
- int32_t flags, fd_t *fd)
+ int32_t flags, fd_t *fd, int wbflags)
{
ha_local_t *local = NULL;
ha_private_t *pvt = NULL;
@@ -1798,6 +1939,7 @@ ha_open (call_frame_t *frame,
int cnt = 0, i, child_count = 0, ret = 0;
hafd_t *hafdp = NULL;
uint64_t tmp_stateino = 0;
+ int32_t op_errno = ENOMEM;
local = frame->local;
pvt = this->private;
@@ -1805,14 +1947,39 @@ ha_open (call_frame_t *frame,
child_count = pvt->child_count;
- local = frame->local = CALLOC (1, sizeof (*local));
+ frame->local = local = GF_CALLOC (1, sizeof (*local),
+ gf_ha_mt_ha_local_t);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
local->op_ret = -1;
local->op_errno = ENOTCONN;
local->fd = fd;
- hafdp = CALLOC (1, sizeof (*hafdp));
- hafdp->fdstate = CALLOC (1, child_count);
- hafdp->path = strdup (loc->path);
+ hafdp = GF_CALLOC (1, sizeof (*hafdp), gf_ha_mt_hafd_t);
+ if (!hafdp) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
+ hafdp->fdstate = GF_CALLOC (1, child_count, gf_ha_mt_char);
+ if (!hafdp->fdstate) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
+ hafdp->path = gf_strdup (loc->path);
+ if (!hafdp->path) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
hafdp->active = -1;
if (pvt->pref_subvol == -1) {
hafdp->active = fd->inode->ino % child_count;
@@ -1833,12 +2000,33 @@ ha_open (call_frame_t *frame,
ha_open_cbk,
children[i],
children[i]->fops->open,
- loc, flags, fd);
+ loc, flags, fd, wbflags);
if (--cnt == 0)
break;
}
}
return 0;
+err:
+ local = frame->local;
+ frame->local = NULL;
+
+ STACK_UNWIND (frame, -1, op_errno, fd);
+ if (hafdp) {
+ if (hafdp->fdstate) {
+ GF_FREE (hafdp->fdstate);
+ hafdp->fdstate = NULL;
+ }
+
+ if (hafdp->path) {
+ GF_FREE (hafdp->path);
+ hafdp->path = NULL;
+ }
+
+ GF_FREE (hafdp);
+ }
+
+ ha_local_wipe (local);
+ return 0;
}
int32_t
@@ -1849,7 +2037,7 @@ ha_readv_cbk (call_frame_t *frame,
int32_t op_errno,
struct iovec *vector,
int32_t count,
- struct stat *stbuf,
+ struct iatt *stbuf,
struct iobref *iobref)
{
int ret = 0;
@@ -1885,6 +2073,11 @@ ha_readv (call_frame_t *frame,
}
local = frame->local;
local->stub = fop_readv_stub (frame, ha_readv, fd, size, offset);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
STACK_WIND_COOKIE (frame,
ha_readv_cbk,
@@ -1896,7 +2089,12 @@ ha_readv (call_frame_t *frame,
offset);
return 0;
err:
+ local = frame->local;
+ frame->local = NULL;
+
STACK_UNWIND (frame, -1, op_errno, NULL, 0, NULL, NULL);
+
+ ha_local_wipe (local);
return 0;
}
@@ -1906,7 +2104,8 @@ ha_writev_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct stat *stbuf)
+ struct iatt *prebuf,
+ struct iatt *postbuf)
{
int ret = 0;
ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
@@ -1915,7 +2114,8 @@ ha_writev_cbk (call_frame_t *frame,
STACK_UNWIND (frame,
op_ret,
op_errno,
- stbuf);
+ prebuf,
+ postbuf);
}
return 0;
}
@@ -1938,7 +2138,13 @@ ha_writev (call_frame_t *frame,
goto err;
}
local = frame->local;
- local->stub = fop_writev_stub (frame, ha_writev, fd, vector, count, off, iobref);
+ local->stub = fop_writev_stub (frame, ha_writev, fd, vector, count, off,
+ iobref);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
STACK_WIND_COOKIE (frame,
ha_writev_cbk,
@@ -1952,7 +2158,12 @@ ha_writev (call_frame_t *frame,
iobref);
return 0;
err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+ local = frame->local;
+ frame->local = NULL;
+
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ ha_local_wipe (local);
return 0;
}
@@ -1989,6 +2200,12 @@ ha_flush (call_frame_t *frame,
}
local = frame->local;
local->stub = fop_flush_stub (frame, ha_flush, fd);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
STACK_WIND_COOKIE (frame,
ha_flush_cbk,
(void *)(long)local->active,
@@ -1997,7 +2214,12 @@ ha_flush (call_frame_t *frame,
fd);
return 0;
err:
+ local = frame->local;
+ frame->local = NULL;
+
STACK_UNWIND (frame, -1, op_errno);
+
+ ha_local_wipe (local);
return 0;
}
@@ -2007,7 +2229,9 @@ ha_fsync_cbk (call_frame_t *frame,
void *cookie,
xlator_t *this,
int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf)
{
int ret = 0;
ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
@@ -2036,6 +2260,12 @@ ha_fsync (call_frame_t *frame,
}
local = frame->local;
local->stub = fop_fsync_stub (frame, ha_fsync, fd, flags);
+ if (!local->stub) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
STACK_WIND_COOKIE (frame,
ha_fsync_cbk,
(void *)(long)local->active,
@@ -2045,7 +2275,12 @@ ha_fsync (call_frame_t *frame,
flags);
return 0;
err:
+ local = frame->local;
+ frame->local = NULL;
+
STACK_UNWIND (frame, -1, op_errno);
+
+ ha_local_wipe (local);
return 0;
}
@@ -2055,7 +2290,7 @@ ha_fstat_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct stat *buf)
+ struct iatt *buf)
{
int ret = 0;
@@ -2086,6 +2321,12 @@ ha_fstat (call_frame_t *frame,
}
local = frame->local;
local->stub = fop_fstat_stub (frame, ha_fstat, fd);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
STACK_WIND_COOKIE (frame,
ha_fstat_cbk,
(void *)(long)local->active,
@@ -2094,7 +2335,12 @@ ha_fstat (call_frame_t *frame,
fd);
return 0;
err:
+ local = frame->local;
+ frame->local = NULL;
+
STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ ha_local_wipe (local);
return 0;
}
@@ -2160,20 +2406,46 @@ ha_opendir (call_frame_t *frame,
int cnt = 0, i, child_count = 0, ret = 0;
hafd_t *hafdp = NULL;
uint64_t tmp_stateino = 0;
+ int32_t op_errno = EINVAL;
local = frame->local;
pvt = this->private;
children = pvt->children;
child_count = pvt->child_count;
- local = frame->local = CALLOC (1, sizeof (*local));
+ frame->local = local = GF_CALLOC (1, sizeof (*local),
+ gf_ha_mt_ha_local_t);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
local->op_ret = -1;
local->op_errno = ENOTCONN;
local->fd = fd;
- hafdp = CALLOC (1, sizeof (*hafdp));
- hafdp->fdstate = CALLOC (1, child_count);
- hafdp->path = strdup (loc->path);
+ hafdp = GF_CALLOC (1, sizeof (*hafdp), gf_ha_mt_hafd_t);
+ if (!hafdp) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
+ hafdp->fdstate = GF_CALLOC (1, child_count, gf_ha_mt_char);
+ if (!hafdp->fdstate) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
+ hafdp->path = gf_strdup (loc->path);
+ if (!hafdp->path) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
LOCK_INIT (&hafdp->lock);
fd_ctx_set (fd, this, (uint64_t)(long)hafdp);
ret = inode_ctx_get (loc->inode, this, &tmp_stateino);
@@ -2198,6 +2470,26 @@ ha_opendir (call_frame_t *frame,
}
}
return 0;
+err:
+ local = frame->local;
+ frame->local = NULL;
+
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ ha_local_wipe (local);
+ if (hafdp) {
+ if (hafdp->fdstate) {
+ GF_FREE (hafdp->fdstate);
+ hafdp->fdstate = NULL;
+ }
+
+ if (hafdp->path) {
+ GF_FREE (hafdp->path);
+ hafdp->path = NULL;
+ }
+
+ GF_FREE (hafdp);
+ }
+ return 0;
}
int32_t
@@ -2239,7 +2531,14 @@ ha_getdents (call_frame_t *frame,
goto err;
}
local = frame->local;
- local->stub = fop_getdents_stub (frame, ha_getdents, fd, size, offset, flag);
+ local->stub = fop_getdents_stub (frame, ha_getdents, fd, size, offset,
+ flag);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
STACK_WIND_COOKIE (frame,
ha_getdents_cbk,
(void *)(long)local->active,
@@ -2251,7 +2550,12 @@ ha_getdents (call_frame_t *frame,
flag);
return 0;
err:
+ local = frame->local;
+ frame->local = NULL;
+
STACK_UNWIND (frame, -1, op_errno, NULL, 0);
+
+ ha_local_wipe (local);
return 0;
}
@@ -2292,7 +2596,13 @@ ha_setdents (call_frame_t *frame,
}
local = frame->local;
- local->stub = fop_setdents_stub (frame, ha_setdents, fd, flags, entries, count);
+ local->stub = fop_setdents_stub (frame, ha_setdents, fd, flags, entries,
+ count);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
STACK_WIND_COOKIE (frame,
ha_setdents_cbk,
@@ -2305,7 +2615,12 @@ ha_setdents (call_frame_t *frame,
count);
return 0;
err:
+ local = frame->local;
+ frame->local = NULL;
+
STACK_UNWIND (frame, -1, op_errno);
+
+ ha_local_wipe (local);
return 0;
}
@@ -2343,6 +2658,12 @@ ha_fsyncdir (call_frame_t *frame,
}
local = frame->local;
local->stub = fop_fsyncdir_stub (frame, ha_fsyncdir, fd, flags);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
STACK_WIND_COOKIE (frame,
ha_fsyncdir_cbk,
(void *)(long)local->active,
@@ -2352,7 +2673,12 @@ ha_fsyncdir (call_frame_t *frame,
flags);
return 0;
err:
- STACK_UNWIND (frame, -1, op_errno);
+ local = frame->local;
+ frame->local = NULL;
+
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ ha_local_wipe (local);
return 0;
}
@@ -2401,7 +2727,8 @@ ha_statfs (call_frame_t *frame,
/* The normal way of handling failover doesn't work here
* as loc->inode may be null in this case.
*/
- local = CALLOC (1, sizeof (*local));
+ local = GF_CALLOC (1, sizeof (*local),
+ gf_ha_mt_ha_local_t);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -2458,6 +2785,12 @@ ha_setxattr (call_frame_t *frame,
}
local = frame->local;
local->stub = fop_setxattr_stub (frame, ha_setxattr, loc, dict, flags);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
STACK_WIND_COOKIE (frame,
ha_setxattr_cbk,
(void *)(long)local->active,
@@ -2468,7 +2801,12 @@ ha_setxattr (call_frame_t *frame,
flags);
return 0;
err:
+ local = frame->local;
+ frame->local = NULL;
+
STACK_UNWIND (frame, -1, op_errno);
+
+ ha_local_wipe (local);
return 0;
}
@@ -2509,6 +2847,12 @@ ha_getxattr (call_frame_t *frame,
}
local = frame->local;
local->stub = fop_getxattr_stub (frame, ha_getxattr, loc, name);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
STACK_WIND_COOKIE (frame,
ha_getxattr_cbk,
(void *)(long)local->active,
@@ -2518,7 +2862,12 @@ ha_getxattr (call_frame_t *frame,
name);
return 0;
err:
+ local = frame->local;
+ frame->local = NULL;
+
STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ ha_local_wipe (local);
return 0;
}
@@ -2557,6 +2906,11 @@ ha_xattrop (call_frame_t *frame,
local = frame->local;
local->stub = fop_xattrop_stub (frame, ha_xattrop, loc, flags, dict);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
STACK_WIND_COOKIE (frame,
ha_xattrop_cbk,
@@ -2568,7 +2922,12 @@ ha_xattrop (call_frame_t *frame,
dict);
return 0;
err:
+ local = frame->local;
+ frame->local = NULL;
+
STACK_UNWIND (frame, -1, op_errno, dict);
+
+ ha_local_wipe (local);
return 0;
}
@@ -2604,6 +2963,11 @@ ha_fxattrop (call_frame_t *frame,
}
local = frame->local;
local->stub = fop_fxattrop_stub (frame, ha_fxattrop, fd, flags, dict);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
STACK_WIND_COOKIE (frame,
ha_fxattrop_cbk,
@@ -2615,7 +2979,12 @@ ha_fxattrop (call_frame_t *frame,
dict);
return 0;
err:
+ local = frame->local;
+ frame->local = NULL;
+
STACK_UNWIND (frame, -1, op_errno, dict);
+
+ ha_local_wipe (local);
return 0;
}
@@ -2653,6 +3022,11 @@ ha_removexattr (call_frame_t *frame,
local = frame->local;
local->stub = fop_removexattr_stub (frame, ha_removexattr, loc, name);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
STACK_WIND_COOKIE (frame,
ha_removexattr_cbk,
@@ -2663,7 +3037,12 @@ ha_removexattr (call_frame_t *frame,
name);
return 0;
err:
+ local = frame->local;
+ frame->local = NULL;
+
STACK_UNWIND (frame, -1, op_errno);
+
+ ha_local_wipe (local);
return 0;
}
@@ -2673,7 +3052,7 @@ ha_lk_setlk_unlck_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct flock *lock)
+ struct gf_flock *lock)
{
ha_local_t *local = NULL;
int cnt = 0;
@@ -2689,7 +3068,7 @@ ha_lk_setlk_unlck_cbk (call_frame_t *frame,
if (cnt == 0) {
stub = local->stub;
- FREE (local->state);
+ GF_FREE (local->state);
if (stub->args.lk.lock.l_type == F_UNLCK) {
STACK_UNWIND (frame, local->op_ret, local->op_errno, &stub->args.lk.lock);
} else {
@@ -2706,7 +3085,7 @@ ha_lk_setlk_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct flock *lock)
+ struct gf_flock *lock)
{
ha_local_t *local = NULL;
ha_private_t *pvt = NULL;
@@ -2738,7 +3117,7 @@ ha_lk_setlk_cbk (call_frame_t *frame,
}
if (i == child_count) {
call_stub_t *stub = local->stub;
- FREE (local->state);
+ GF_FREE (local->state);
STACK_UNWIND (frame, 0, op_errno, &stub->args.lk.lock);
call_stub_destroy (stub);
return 0;
@@ -2762,7 +3141,7 @@ ha_lk_setlk_cbk (call_frame_t *frame,
cnt++;
}
if (cnt) {
- struct flock lock;
+ struct gf_flock lock;
lock = local->stub->args.lk.lock;
for (i = 0; i < child_count; i++) {
if (state[i]) {
@@ -2779,7 +3158,7 @@ ha_lk_setlk_cbk (call_frame_t *frame,
}
return 0;
} else {
- FREE (local->state);
+ GF_FREE (local->state);
call_stub_destroy (local->stub);
STACK_UNWIND (frame,
op_ret,
@@ -2796,7 +3175,7 @@ ha_lk_getlk_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct flock *lock)
+ struct gf_flock *lock)
{
ha_local_t *local = NULL;
ha_private_t *pvt = NULL;
@@ -2813,7 +3192,7 @@ ha_lk_getlk_cbk (call_frame_t *frame,
prev_frame = cookie;
if (op_ret == 0) {
- FREE (local->state);
+ GF_FREE (local->state);
call_stub_destroy (local->stub);
STACK_UNWIND (frame, 0, 0, lock);
return 0;
@@ -2830,7 +3209,7 @@ ha_lk_getlk_cbk (call_frame_t *frame,
}
if (i == child_count) {
- FREE (local->state);
+ GF_FREE (local->state);
call_stub_destroy (local->stub);
STACK_UNWIND (frame, op_ret, op_errno, lock);
return 0;
@@ -2851,7 +3230,7 @@ ha_lk (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
int32_t cmd,
- struct flock *lock)
+ struct gf_flock *lock)
{
ha_local_t *local = NULL;
ha_private_t *pvt = NULL;
@@ -2860,6 +3239,7 @@ ha_lk (call_frame_t *frame,
int child_count = 0, i = 0, cnt = 0, ret = 0;
xlator_t **children = NULL;
uint64_t tmp_hafdp = 0;
+ int32_t op_errno = EINVAL;
local = frame->local;
pvt = this->private;
@@ -2870,7 +3250,14 @@ ha_lk (call_frame_t *frame,
gf_log (this->name, GF_LOG_ERROR, "fd_ctx_get failed");
if (local == NULL) {
- local = frame->local = CALLOC (1, sizeof (*local));
+ local = frame->local = GF_CALLOC (1, sizeof (*local),
+ gf_ha_mt_ha_local_t);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
local->active = -1;
local->op_ret = -1;
local->op_errno = ENOTCONN;
@@ -2878,12 +3265,24 @@ ha_lk (call_frame_t *frame,
hafdp = (hafd_t *)(long)tmp_hafdp;
if (local->active == -1) {
- STACK_UNWIND (frame, -1, ENOTCONN, NULL);
- return 0;
+ op_errno = ENOTCONN;
+ goto err;
}
local->stub = fop_lk_stub (frame, ha_lk, fd, cmd, lock);
- local->state = CALLOC (1, child_count);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
+ local->state = GF_CALLOC (1, child_count, gf_ha_mt_char);
+ if (!local->state) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
state = hafdp->fdstate;
LOCK (&hafdp->lock);
memcpy (local->state, state, child_count);
@@ -2931,6 +3330,14 @@ ha_lk (call_frame_t *frame,
lock);
}
return 0;
+err:
+ local = frame->local;
+ frame->local = NULL;
+
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ ha_local_wipe (local);
+ return 0;
}
int32_t
@@ -2957,7 +3364,7 @@ ha_inodelk (call_frame_t *frame,
const char *volume,
loc_t *loc,
int32_t cmd,
- struct flock *lock)
+ struct gf_flock *lock)
{
ha_local_t *local = NULL;
int op_errno = 0;
@@ -3055,6 +3462,11 @@ ha_checksum (call_frame_t *frame,
}
local = frame->local;
local->stub = fop_checksum_stub (frame, ha_checksum, loc, flag);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
STACK_WIND_COOKIE (frame,
ha_checksum_cbk,
@@ -3065,17 +3477,18 @@ ha_checksum (call_frame_t *frame,
flag);
return 0;
err:
+ local = frame->local;
+ frame->local = NULL;
+
STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ ha_local_wipe (local);
return 0;
}
int32_t
-ha_readdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *entries)
+ha_common_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
{
int ret = 0;
@@ -3086,11 +3499,15 @@ ha_readdir_cbk (call_frame_t *frame,
}
int32_t
-ha_readdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t off)
+ha_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off);
+
+int32_t
+ha_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off);
+int32_t
+ha_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, int whichop)
{
ha_local_t *local = NULL;
int op_errno = 0;
@@ -3101,19 +3518,58 @@ ha_readdir (call_frame_t *frame,
goto err;
}
local = frame->local;
- local->stub = fop_readdir_stub (frame, ha_readdir, fd, size, off);
- STACK_WIND_COOKIE (frame,
- ha_readdir_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->readdir,
- fd, size, off);
+ if (whichop == GF_FOP_READDIR)
+ local->stub = fop_readdir_stub (frame, ha_readdir, fd, size,
+ off);
+ else
+ local->stub = fop_readdirp_stub (frame, ha_readdirp, fd, size,
+ off);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
+ if (whichop == GF_FOP_READDIR)
+ STACK_WIND_COOKIE (frame, ha_common_readdir_cbk,
+ (void *)(long)local->active,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->readdir,
+ fd, size, off);
+ else
+ STACK_WIND_COOKIE (frame, ha_common_readdir_cbk,
+ (void *)(long)local->active,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->readdirp,
+ fd, size, off);
return 0;
err:
+ local = frame->local;
+ frame->local = NULL;
+
STACK_UNWIND (frame, -1, ENOTCONN, NULL);
+
+ ha_local_wipe (local);
return 0;
}
+
+int32_t
+ha_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off)
+{
+ ha_do_readdir (frame, this, fd, size, off, GF_FOP_READDIR);
+ return 0;
+}
+
+int32_t
+ha_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off)
+{
+ ha_do_readdir (frame, this, fd, size, off, GF_FOP_READDIRP);
+ return 0;
+}
+
/* Management operations */
int32_t
@@ -3174,8 +3630,16 @@ ha_stats (call_frame_t *frame,
ha_private_t *pvt = NULL;
xlator_t **children = NULL;
int i = 0;
+ int32_t op_errno = EINVAL;
+
+ local = frame->local = GF_CALLOC (1, sizeof (*local),
+ gf_ha_mt_ha_local_t);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
- local = frame->local = CALLOC (1, sizeof (*local));
pvt = this->private;
children = pvt->children;
for (i = 0; i < pvt->child_count; i++) {
@@ -3184,8 +3648,8 @@ ha_stats (call_frame_t *frame,
}
if (i == pvt->child_count) {
- STACK_UNWIND (frame, -1, ENOTCONN, NULL);
- return 0;
+ op_errno = ENOTCONN;
+ goto err;
}
local->flags = flags;
@@ -3195,6 +3659,16 @@ ha_stats (call_frame_t *frame,
children[i]->mops->stats,
flags);
return 0;
+
+err:
+ local = frame->local;
+ frame->local = NULL;
+
+ STACK_UNWIND (frame, -1, ENOTCONN, NULL);
+
+ ha_local_wipe (local);
+ return 0;
+
}
@@ -3258,20 +3732,27 @@ ha_getspec (call_frame_t *frame,
ha_private_t *pvt = NULL;
xlator_t **children = NULL;
int i = 0;
+ int32_t op_errno = EINVAL;
+
+ local = frame->local = GF_CALLOC (1, sizeof (*local),
+ gf_ha_mt_ha_local_t);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
- local = frame->local = CALLOC (1, sizeof (*local));
pvt = this->private;
children = pvt->children;
- local = frame->local = CALLOC (1, sizeof (*local));
for (i = 0; i < pvt->child_count; i++) {
if (pvt->state[i])
break;
}
if (i == pvt->child_count) {
- STACK_UNWIND (frame, -1, ENOTCONN, NULL);
- return 0;
+ op_errno = ENOTCONN;
+ goto err;
}
local->flags = flags;
local->pattern = (char *)key;
@@ -3282,6 +3763,15 @@ ha_getspec (call_frame_t *frame,
children[i]->mops->getspec,
key, flags);
return 0;
+err:
+ local = frame->local;
+ frame->local = NULL;
+
+ STACK_UNWIND (frame, -1, ENOTCONN, NULL);
+
+ ha_local_wipe (local);
+ return 0;
+
}
int32_t
@@ -3299,8 +3789,8 @@ ha_closedir (xlator_t *this,
}
hafdp = (hafd_t *)(long)tmp_hafdp;
- FREE (hafdp->fdstate);
- FREE (hafdp->path);
+ GF_FREE (hafdp->fdstate);
+ GF_FREE (hafdp->path);
LOCK_DESTROY (&hafdp->lock);
return 0;
}
@@ -3320,8 +3810,8 @@ ha_close (xlator_t *this,
}
hafdp = (hafd_t *)(long)tmp_hafdp;
- FREE (hafdp->fdstate);
- FREE (hafdp->path);
+ GF_FREE (hafdp->fdstate);
+ GF_FREE (hafdp->path);
LOCK_DESTROY (&hafdp->lock);
return 0;
}
@@ -3392,6 +3882,25 @@ notify (xlator_t *this,
return 0;
}
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_ha_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ return ret;
+ }
+
+ return ret;
+}
+
int
init (xlator_t *this)
{
@@ -3399,6 +3908,7 @@ init (xlator_t *this)
xlator_list_t *trav = NULL;
int count = 0, ret = 0;
+
if (!this->children) {
gf_log (this->name,GF_LOG_ERROR,
"FATAL: ha should have one or more child defined");
@@ -3411,7 +3921,7 @@ init (xlator_t *this)
}
trav = this->children;
- pvt = CALLOC (1, sizeof (ha_private_t));
+ pvt = GF_CALLOC (1, sizeof (ha_private_t), gf_ha_mt_ha_private_t);
ret = dict_get_int32 (this->options, "preferred-subvolume",
&pvt->pref_subvol);
@@ -3426,7 +3936,8 @@ init (xlator_t *this)
}
pvt->child_count = count;
- pvt->children = CALLOC (count, sizeof (xlator_t*));
+ pvt->children = GF_CALLOC (count, sizeof (xlator_t*),
+ gf_ha_mt_xlator_t);
trav = this->children;
count = 0;
@@ -3436,7 +3947,7 @@ init (xlator_t *this)
trav = trav->next;
}
- pvt->state = CALLOC (1, count);
+ pvt->state = GF_CALLOC (1, count, gf_ha_mt_char);
this->private = pvt;
return 0;
}
@@ -3446,7 +3957,7 @@ fini (xlator_t *this)
{
ha_private_t *priv = NULL;
priv = this->private;
- FREE (priv);
+ GF_FREE (priv);
return;
}
@@ -3462,10 +3973,7 @@ struct xlator_fops fops = {
.symlink = ha_symlink,
.rename = ha_rename,
.link = ha_link,
- .chmod = ha_chmod,
- .chown = ha_chown,
.truncate = ha_truncate,
- .utimens = ha_utimens,
.create = ha_create,
.open = ha_open,
.readv = ha_readv,
@@ -3478,24 +3986,20 @@ struct xlator_fops fops = {
.removexattr = ha_removexattr,
.opendir = ha_opendir,
.readdir = ha_readdir,
+ .readdirp = ha_readdirp,
.getdents = ha_getdents,
.fsyncdir = ha_fsyncdir,
.access = ha_access,
.ftruncate = ha_ftruncate,
.fstat = ha_fstat,
.lk = ha_lk,
- .fchmod = ha_fchmod,
- .fchown = ha_fchown,
.setdents = ha_setdents,
.lookup_cbk = ha_lookup_cbk,
.checksum = ha_checksum,
.xattrop = ha_xattrop,
- .fxattrop = ha_fxattrop
-};
-
-struct xlator_mops mops = {
- .stats = ha_stats,
- .getspec = ha_getspec,
+ .fxattrop = ha_fxattrop,
+ .setattr = ha_setattr,
+ .fsetattr = ha_fsetattr,
};
struct xlator_cbks cbks = {
diff --git a/xlators/cluster/ha/src/ha.h b/xlators/cluster/ha/src/ha.h
index d47d96535..e2ed7eaa6 100644
--- a/xlators/cluster/ha/src/ha.h
+++ b/xlators/cluster/ha/src/ha.h
@@ -1,25 +1,17 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef __HA_H_
#define __HA_H_
+#include "ha-mem-types.h"
+
typedef struct {
call_stub_t *stub;
int32_t op_ret, op_errno;
@@ -28,7 +20,9 @@ typedef struct {
char *state, *pattern;
dict_t *dict;
loc_t loc;
- struct stat buf;
+ struct iatt buf;
+ struct iatt postparent;
+ struct iatt preparent;
fd_t *fd;
inode_t *inode;
int32_t flags;
diff --git a/xlators/cluster/map/src/Makefile.am b/xlators/cluster/map/src/Makefile.am
index 26e19137a..a278b05e2 100644
--- a/xlators/cluster/map/src/Makefile.am
+++ b/xlators/cluster/map/src/Makefile.am
@@ -1,15 +1,16 @@
xlator_LTLIBRARIES = map.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/cluster
-map_la_LDFLAGS = -module -avoidversion
+map_la_LDFLAGS = -module -avoid-version
map_la_SOURCES = map.c map-helper.c
map_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = map.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/cluster/map/src/map-helper.c b/xlators/cluster/map/src/map-helper.c
index 365eeb490..851397b68 100644
--- a/xlators/cluster/map/src/map-helper.c
+++ b/xlators/cluster/map/src/map-helper.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2009-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2009-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -256,14 +246,15 @@ verify_dir_and_assign_subvol (xlator_t *this,
goto out;
}
- tmp_map = CALLOC (1, sizeof (struct map_pattern));
+ tmp_map = GF_CALLOC (1, sizeof (struct map_pattern),
+ gf_map_mt_map_pattern);
tmp_map->xl = trav->xlator;
tmp_map->dir_len = strlen (directory);
/* make sure that the top level directory starts
* with '/' and ends without '/'
*/
- tmp_map->directory = strdup (directory);
+ tmp_map->directory = gf_strdup (directory);
if (directory[tmp_map->dir_len - 1] == '/') {
tmp_map->dir_len--;
}
diff --git a/xlators/cluster/map/src/map-mem-types.h b/xlators/cluster/map/src/map-mem-types.h
new file mode 100644
index 000000000..3e89f4736
--- /dev/null
+++ b/xlators/cluster/map/src/map-mem-types.h
@@ -0,0 +1,24 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __MAP_MEM_TYPES_H__
+#define __MAP_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_map_mem_types_ {
+ gf_map_mt_map_private_t = gf_common_mt_end + 1,
+ gf_map_mt_map_local_t,
+ gf_map_mt_map_xlator_array,
+ gf_map_mt_map_pattern,
+ gf_map_mt_end
+};
+#endif
+
diff --git a/xlators/cluster/map/src/map.c b/xlators/cluster/map/src/map.c
index 98d6b33b0..6150a33ce 100644
--- a/xlators/cluster/map/src/map.c
+++ b/xlators/cluster/map/src/map.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -36,87 +26,53 @@ map_stat_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct stat *buf)
+ struct iatt *buf)
{
call_frame_t *prev = NULL;
prev = cookie;
- map_itransform (this, prev->this, buf->st_ino, &buf->st_ino);
+ map_itransform (this, prev->this, buf->ia_ino, &buf->ia_ino);
STACK_UNWIND (frame, op_ret, op_errno, buf);
return 0;
}
-
static int32_t
-map_chmod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+map_setattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *statpre,
+ struct iatt *statpost)
{
call_frame_t *prev = NULL;
prev = cookie;
-
- map_itransform (this, prev->this, buf->st_ino, &buf->st_ino);
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
+ map_itransform (this, prev->this, statpre->ia_ino, &statpre->ia_ino);
+ map_itransform (this, prev->this, statpost->ia_ino, &statpost->ia_ino);
-static int32_t
-map_fchmod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- call_frame_t *prev = NULL;
- prev = cookie;
-
- map_itransform (this, prev->this, buf->st_ino, &buf->st_ino);
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ STACK_UNWIND (frame, op_ret, op_errno, statpre, statpost);
return 0;
}
-
static int32_t
-map_chown_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+map_fsetattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *statpre,
+ struct iatt *statpost)
{
call_frame_t *prev = NULL;
prev = cookie;
-
- map_itransform (this, prev->this, buf->st_ino, &buf->st_ino);
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
+ map_itransform (this, prev->this, statpre->ia_ino, &statpre->ia_ino);
+ map_itransform (this, prev->this, statpost->ia_ino, &statpost->ia_ino);
-static int32_t
-map_fchown_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- call_frame_t *prev = NULL;
- prev = cookie;
-
- map_itransform (this, prev->this, buf->st_ino, &buf->st_ino);
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ STACK_UNWIND (frame, op_ret, op_errno, statpre, statpost);
return 0;
}
@@ -126,14 +82,15 @@ map_truncate_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct stat *buf)
+ struct iatt *prebuf,
+ struct iatt *postbuf)
{
call_frame_t *prev = NULL;
prev = cookie;
- map_itransform (this, prev->this, buf->st_ino, &buf->st_ino);
+ map_itransform (this, prev->this, postbuf->ia_ino, &postbuf->ia_ino);
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ STACK_UNWIND (frame, op_ret, op_errno, prebuf, postbuf);
return 0;
}
@@ -143,31 +100,15 @@ map_ftruncate_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct stat *buf)
-{
- call_frame_t *prev = NULL;
- prev = cookie;
-
- map_itransform (this, prev->this, buf->st_ino, &buf->st_ino);
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-int32_t
-map_utimens_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+ struct iatt *prebuf,
+ struct iatt *postbuf)
{
call_frame_t *prev = NULL;
prev = cookie;
- map_itransform (this, prev->this, buf->st_ino, &buf->st_ino);
+ map_itransform (this, prev->this, postbuf->ia_ino, &postbuf->ia_ino);
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ STACK_UNWIND (frame, op_ret, op_errno, prebuf, postbuf);
return 0;
}
@@ -189,9 +130,10 @@ map_readlink_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- const char *path)
+ const char *path,
+ struct iatt *sbuf)
{
- STACK_UNWIND (frame, op_ret, op_errno, path);
+ STACK_UNWIND (frame, op_ret, op_errno, path, sbuf);
return 0;
}
@@ -200,9 +142,11 @@ map_unlink_cbk (call_frame_t *frame,
void *cookie,
xlator_t *this,
int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno,
+ struct iatt *preparent,
+ struct iatt *postparent)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ STACK_UNWIND (frame, op_ret, op_errno, preparent, postparent);
return 0;
}
@@ -211,9 +155,11 @@ map_rmdir_cbk (call_frame_t *frame,
void *cookie,
xlator_t *this,
int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno,
+ struct iatt *preparent,
+ struct iatt *postparent)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ STACK_UNWIND (frame, op_ret, op_errno, preparent, postparent);
return 0;
}
@@ -224,12 +170,16 @@ map_rename_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct stat *buf)
+ struct iatt *buf,
+ struct iatt *preoldparent,
+ struct iatt *postoldparent,
+ struct iatt *prenewparent,
+ struct iatt *postnewparent)
{
call_frame_t *prev = NULL;
prev = cookie;
- map_itransform (this, prev->this, buf->st_ino, &buf->st_ino);
+ map_itransform (this, prev->this, buf->ia_ino, &buf->ia_ino);
STACK_UNWIND (frame, op_ret, op_errno, buf);
return 0;
@@ -242,12 +192,14 @@ map_link_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf)
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent)
{
call_frame_t *prev = NULL;
prev = cookie;
- map_itransform (this, prev->this, buf->st_ino, &buf->st_ino);
+ map_itransform (this, prev->this, buf->ia_ino, &buf->ia_ino);
STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
return 0;
@@ -273,13 +225,13 @@ map_readv_cbk (call_frame_t *frame,
int32_t op_errno,
struct iovec *vector,
int32_t count,
- struct stat *stbuf,
+ struct iatt *stbuf,
struct iobref *iobref)
{
call_frame_t *prev = NULL;
prev = cookie;
- map_itransform (this, prev->this, stbuf->st_ino, &stbuf->st_ino);
+ map_itransform (this, prev->this, stbuf->ia_ino, &stbuf->ia_ino);
STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf, iobref);
return 0;
@@ -291,14 +243,15 @@ map_writev_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct stat *stbuf)
+ struct iatt *prebuf,
+ struct iatt *postbuf)
{
call_frame_t *prev = NULL;
prev = cookie;
- map_itransform (this, prev->this, stbuf->st_ino, &stbuf->st_ino);
+ map_itransform (this, prev->this, postbuf->ia_ino, &postbuf->ia_ino);
- STACK_UNWIND (frame, op_ret, op_errno, stbuf);
+ STACK_UNWIND (frame, op_ret, op_errno, prebuf, postbuf);
return 0;
}
@@ -319,9 +272,11 @@ map_fsync_cbk (call_frame_t *frame,
void *cookie,
xlator_t *this,
int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ STACK_UNWIND (frame, op_ret, op_errno, prebuf, postbuf);
return 0;
}
@@ -332,12 +287,12 @@ map_fstat_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct stat *buf)
+ struct iatt *buf)
{
call_frame_t *prev = NULL;
prev = cookie;
- map_itransform (this, prev->this, buf->st_ino, &buf->st_ino);
+ map_itransform (this, prev->this, buf->ia_ino, &buf->ia_ino);
STACK_UNWIND (frame, op_ret, op_errno, buf);
return 0;
@@ -473,7 +428,7 @@ map_lk_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct flock *lock)
+ struct gf_flock *lock)
{
STACK_UNWIND (frame, op_ret, op_errno, lock);
return 0;
@@ -527,12 +482,14 @@ map_newentry_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf)
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent)
{
call_frame_t *prev = NULL;
prev = cookie;
- map_itransform (this, prev->this, buf->st_ino, &buf->st_ino);
+ map_itransform (this, prev->this, buf->ia_ino, &buf->ia_ino);
STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
return 0;
@@ -548,12 +505,14 @@ map_create_cbk (call_frame_t *frame,
int32_t op_errno,
fd_t *fd,
inode_t *inode,
- struct stat *buf)
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent)
{
call_frame_t *prev = NULL;
prev = cookie;
- map_itransform (this, prev->this, buf->st_ino, &buf->st_ino);
+ map_itransform (this, prev->this, buf->ia_ino, &buf->ia_ino);
STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf);
return 0;
@@ -655,13 +614,14 @@ map_single_lookup_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf,
- dict_t *dict)
+ struct iatt *buf,
+ dict_t *dict,
+ struct iatt *postparent)
{
call_frame_t *prev = NULL;
prev = cookie;
- map_itransform (this, prev->this, buf->st_ino, &buf->st_ino);
+ map_itransform (this, prev->this, buf->ia_ino, &buf->ia_ino);
STACK_UNWIND (frame, op_ret, op_errno, inode, buf, dict);
@@ -675,8 +635,9 @@ map_root_lookup_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf,
- dict_t *dict)
+ struct iatt *buf,
+ dict_t *dict,
+ struct iatt *postparent)
{
int callcnt = 0;
map_local_t *local = NULL;
@@ -779,9 +740,36 @@ map_single_readdir_cbk (call_frame_t *frame,
}
+int32_t
+map_single_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
+{
+ call_frame_t *prev = NULL;
+ gf_dirent_t *orig_entry = NULL;
+
+ prev = cookie;
+
+ list_for_each_entry (orig_entry, &entries->list, list) {
+ map_itransform (this, prev->this, orig_entry->d_ino,
+ &orig_entry->d_ino);
+ orig_entry->d_stat.ia_ino = orig_entry->d_ino;
+ }
+ STACK_UNWIND (frame, op_ret, op_errno, entries);
+ return 0;
+}
+
int
map_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *orig_entries)
+ int op_ret, int op_errno, gf_dirent_t *orig_entries);
+
+int
+map_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *orig_entries);
+
+int
+map_generic_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *orig_entries,
+ int whichop)
{
map_local_t *local = NULL;
gf_dirent_t entries;
@@ -816,6 +804,8 @@ map_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
map_itransform (this, subvol, orig_entry->d_off,
&entry->d_off);
+ if (whichop == GF_FOP_READDIRP)
+ entry->d_stat.ia_ino = entry->d_ino;
entry->d_type = orig_entry->d_type;
entry->d_len = orig_entry->d_len;
@@ -841,9 +831,14 @@ done:
goto unwind;
}
- STACK_WIND (frame, map_readdir_cbk,
- next_subvol, next_subvol->fops->readdir,
- local->fd, local->size, 0);
+ if (whichop == GF_FOP_READDIR)
+ STACK_WIND (frame, map_readdir_cbk, next_subvol,
+ next_subvol->fops->readdir, local->fd,
+ local->size, 0);
+ else
+ STACK_WIND (frame, map_readdirp_cbk, next_subvol,
+ next_subvol->fops->readdirp, local->fd,
+ local->size, 0);
return 0;
}
@@ -864,6 +859,26 @@ unwind:
}
+int
+map_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *orig_entries)
+{
+ map_generic_readdir_cbk (frame, cookie, this, op_ret, op_errno,
+ orig_entries, GF_FOP_READDIR);
+ return 0;
+}
+
+
+int
+map_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *orig_entries)
+{
+ map_generic_readdir_cbk (frame, cookie, this, op_ret, op_errno,
+ orig_entries, GF_FOP_READDIRP);
+ return 0;
+}
+
+
/* Management operations */
static int32_t
@@ -880,23 +895,6 @@ map_checksum_cbk (call_frame_t *frame,
}
-int32_t
-map_lock_notify_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-
-int32_t
-map_lock_fnotify_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
/* Fops starts here */
int32_t
@@ -928,21 +926,22 @@ map_stat (call_frame_t *frame,
return 0;
}
-
int32_t
-map_chmod (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode)
+map_setattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ struct iatt *stbuf,
+ int32_t valid)
{
int32_t op_errno = 1;
xlator_t *subvol = NULL;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ GF_VALIDATE_OR_GOTO ("map", this, err);
+ GF_VALIDATE_OR_GOTO (this->name, frame, err);
+ GF_VALIDATE_OR_GOTO (this->name, loc, err);
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, err);
+ GF_VALIDATE_OR_GOTO (this->name, loc->path, err);
+ GF_VALIDATE_OR_GOTO (this->name, stbuf, err);
subvol = get_mapping_subvol_from_ctx (this, loc->inode);
if (!subvol) {
@@ -950,8 +949,8 @@ map_chmod (call_frame_t *frame,
goto err;
}
- STACK_WIND (frame, map_chmod_cbk, subvol,
- subvol->fops->chmod, loc, mode);
+ STACK_WIND (frame, map_setattr_cbk, subvol,
+ subvol->fops->setattr, loc, stbuf, valid);
return 0;
err:
STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
@@ -960,18 +959,19 @@ map_chmod (call_frame_t *frame,
}
int32_t
-map_fchmod (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- mode_t mode)
+map_fsetattr (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iatt *stbuf,
+ int32_t valid)
{
int32_t op_errno = 1;
xlator_t *subvol = NULL;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
+ GF_VALIDATE_OR_GOTO ("map", this, err);
+ GF_VALIDATE_OR_GOTO (this->name, frame, err);
+ GF_VALIDATE_OR_GOTO (this->name, fd, err);
+ GF_VALIDATE_OR_GOTO (this->name, stbuf, err);
subvol = get_mapping_subvol_from_ctx (this, fd->inode);
if (!subvol) {
@@ -979,71 +979,8 @@ map_fchmod (call_frame_t *frame,
goto err;
}
- STACK_WIND (frame, map_fchmod_cbk, subvol,
- subvol->fops->fchmod, fd, mode);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-int32_t
-map_chown (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- uid_t uid,
- gid_t gid)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- subvol = get_mapping_subvol_from_ctx (this, loc->inode);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, map_chown_cbk, subvol,
- subvol->fops->chown, loc, uid, gid);
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-int32_t
-map_fchown (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- uid_t uid,
- gid_t gid)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
-
- subvol = get_mapping_subvol_from_ctx (this, fd->inode);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, map_fchown_cbk, subvol,
- subvol->fops->fchown, fd, uid, gid);
-
+ STACK_WIND (frame, map_fsetattr_cbk, subvol,
+ subvol->fops->fsetattr, fd, stbuf, valid);
return 0;
err:
STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
@@ -1113,37 +1050,6 @@ map_ftruncate (call_frame_t *frame,
}
int32_t
-map_utimens (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- struct timespec tv[2])
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- subvol = get_mapping_subvol_from_ctx (this, loc->inode);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, map_utimens_cbk, subvol,
- subvol->fops->utimens, loc, tv);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-int32_t
map_access (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
@@ -1353,7 +1259,7 @@ int32_t
map_open (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
- int32_t flags, fd_t *fd)
+ int32_t flags, fd_t *fd, int wbflags)
{
int32_t op_errno = 1;
xlator_t *subvol = NULL;
@@ -1371,7 +1277,7 @@ map_open (call_frame_t *frame,
}
STACK_WIND (frame, map_open_cbk, subvol,
- subvol->fops->open, loc, flags, fd);
+ subvol->fops->open, loc, flags, fd, wbflags);
return 0;
err:
@@ -1854,7 +1760,7 @@ map_lk (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
int32_t cmd,
- struct flock *lock)
+ struct gf_flock *lock)
{
int32_t op_errno = 1;
xlator_t *subvol = NULL;
@@ -1883,7 +1789,7 @@ map_lk (call_frame_t *frame,
int32_t
map_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct flock *lock)
+ const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *lock)
{
int32_t op_errno = 1;
xlator_t *subvol = NULL;
@@ -1913,7 +1819,7 @@ map_inodelk (call_frame_t *frame, xlator_t *this,
int32_t
map_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct flock *lock)
+ const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *lock)
{
int32_t op_errno = 1;
xlator_t *subvol = NULL;
@@ -2231,7 +2137,8 @@ map_lookup (call_frame_t *frame,
return 0;
root_inode:
- local = CALLOC (1, sizeof (map_local_t));
+ local = GF_CALLOC (1, sizeof (map_local_t),
+ gf_map_mt_map_local_t);
frame->local = local;
local->call_count = priv->child_count;
@@ -2283,7 +2190,8 @@ map_statfs (call_frame_t *frame,
return 0;
root_inode:
- local = CALLOC (1, sizeof (map_local_t));
+ local = GF_CALLOC (1, sizeof (map_local_t),
+ gf_map_mt_map_local_t);
priv = this->private;
frame->local = local;
@@ -2335,7 +2243,8 @@ map_opendir (call_frame_t *frame,
return 0;
root_inode:
- local = CALLOC (1, sizeof (map_local_t));
+ local = GF_CALLOC (1, sizeof (map_local_t),
+ gf_map_mt_map_local_t);
priv = this->private;
frame->local = local;
@@ -2359,11 +2268,8 @@ map_opendir (call_frame_t *frame,
int32_t
-map_readdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t yoff)
+map_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t yoff, int whichop)
{
int32_t op_errno = EINVAL;
xlator_t *subvol = NULL;
@@ -2386,14 +2292,19 @@ map_readdir (call_frame_t *frame,
}
/* Just one callback */
- STACK_WIND (frame, map_single_readdir_cbk, subvol,
- subvol->fops->readdir, fd, size, yoff);
+ if (whichop == GF_FOP_READDIR)
+ STACK_WIND (frame, map_single_readdir_cbk, subvol,
+ subvol->fops->readdir, fd, size, yoff);
+ else
+ STACK_WIND (frame, map_single_readdirp_cbk, subvol,
+ subvol->fops->readdirp, fd, size, yoff);
return 0;
root_inode:
/* readdir on '/' */
- local = CALLOC (1, sizeof (map_local_t));
+ local = GF_CALLOC (1, sizeof (map_local_t),
+ gf_map_mt_map_local_t);
if (!local) {
gf_log (this->name, GF_LOG_ERROR,
"memory allocation failed :(");
@@ -2411,8 +2322,12 @@ map_readdir (call_frame_t *frame,
map_deitransform (this, yoff, &xvol, (uint64_t *)&xoff);
- STACK_WIND (frame, map_readdir_cbk, xvol,
- xvol->fops->readdir, fd, size, xoff);
+ if (whichop == GF_FOP_READDIR)
+ STACK_WIND (frame, map_readdir_cbk, xvol, xvol->fops->readdir,
+ fd, size, xoff);
+ else
+ STACK_WIND (frame, map_readdirp_cbk, xvol, xvol->fops->readdirp,
+ fd, size, xoff);
return 0;
err:
@@ -2422,6 +2337,24 @@ map_readdir (call_frame_t *frame,
}
+
+int32_t
+map_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t yoff)
+{
+ map_do_readdir (frame, this, fd, size, yoff, GF_FOP_READDIR);
+ return 0;
+}
+
+int32_t
+map_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t yoff)
+{
+ map_do_readdir (frame, this, fd, size, yoff, GF_FOP_READDIRP);
+ return 0;
+}
+
+
void
fini (xlator_t *this)
{
@@ -2432,22 +2365,40 @@ fini (xlator_t *this)
priv = this->private;
if (priv) {
- if (priv->xlarray)
- FREE (priv->xlarray);
+ GF_FREE (priv->xlarray);
trav_map = priv->map;
while (trav_map) {
tmp_map = trav_map;
trav_map = trav_map->next;
- FREE (tmp_map);
+ GF_FREE (tmp_map);
}
- FREE(priv);
+ GF_FREE(priv);
}
return;
}
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_map_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ return ret;
+ }
+
+ return ret;
+}
+
int
init (xlator_t *this)
{
@@ -2464,6 +2415,7 @@ init (xlator_t *this)
char *subvol_str = NULL;
char *map_xl = NULL;
+
if (!this->children) {
gf_log (this->name,GF_LOG_ERROR,
"FATAL: map should have one or more child defined");
@@ -2475,7 +2427,8 @@ init (xlator_t *this)
"dangling volume. check volfile ");
}
- priv = CALLOC (1, sizeof (map_private_t));
+ priv = GF_CALLOC (1, sizeof (map_private_t),
+ gf_map_mt_map_private_t);
this->private = priv;
/* allocate xlator array */
@@ -2484,7 +2437,8 @@ init (xlator_t *this)
count++;
trav = trav->next;
}
- priv->xlarray = CALLOC (1, sizeof (struct map_xlator_array) * count);
+ priv->xlarray = GF_CALLOC (1, sizeof (struct map_xlator_array) * count,
+ gf_map_mt_map_xlator_array);
priv->child_count = count;
/* build xlator array */
@@ -2504,7 +2458,7 @@ init (xlator_t *this)
}
map_pair_str = strtok_r (pattern_string, ";", &tmp_str);
while (map_pair_str) {
- dup_map_pair = strdup (map_pair_str);
+ dup_map_pair = gf_strdup (map_pair_str);
dir_str = strtok_r (dup_map_pair, ":", &tmp_str1);
if (!dir_str) {
gf_log (this->name, GF_LOG_ERROR,
@@ -2526,7 +2480,7 @@ init (xlator_t *this)
goto err;
}
- FREE (dup_map_pair);
+ GF_FREE (dup_map_pair);
map_pair_str = strtok_r (NULL, ";", &tmp_str);
}
@@ -2557,12 +2511,7 @@ struct xlator_fops fops = {
.create = map_create,
.stat = map_stat,
- .chmod = map_chmod,
- .chown = map_chown,
- .fchown = map_fchown,
- .fchmod = map_fchmod,
.fstat = map_fstat,
- .utimens = map_utimens,
.truncate = map_truncate,
.ftruncate = map_ftruncate,
.access = map_access,
@@ -2581,6 +2530,7 @@ struct xlator_fops fops = {
.lk = map_lk,
.opendir = map_opendir,
.readdir = map_readdir,
+ .readdirp = map_readdirp,
.fsyncdir = map_fsyncdir,
.symlink = map_symlink,
.unlink = map_unlink,
@@ -2597,9 +2547,8 @@ struct xlator_fops fops = {
.setdents = map_setdents,
.getdents = map_getdents,
.checksum = map_checksum,
-};
-
-struct xlator_mops mops = {
+ .setattr = map_setattr,
+ .fsetattr = map_fsetattr,
};
struct xlator_cbks cbks = {
diff --git a/xlators/cluster/map/src/map.h b/xlators/cluster/map/src/map.h
index 72b4f5640..7703a543e 100644
--- a/xlators/cluster/map/src/map.h
+++ b/xlators/cluster/map/src/map.h
@@ -1,26 +1,17 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __MAP_H__
#define __MAP_H__
#include "xlator.h"
+#include "map-mem-types.h"
struct map_pattern {
struct map_pattern *next;
@@ -46,7 +37,7 @@ typedef struct {
int32_t op_errno;
int call_count;
struct statvfs statvfs;
- struct stat stbuf;
+ struct iatt stbuf;
inode_t *inode;
dict_t *dict;
fd_t *fd;
diff --git a/xlators/cluster/stripe/src/Makefile.am b/xlators/cluster/stripe/src/Makefile.am
index 180d0da1f..2d151422a 100644
--- a/xlators/cluster/stripe/src/Makefile.am
+++ b/xlators/cluster/stripe/src/Makefile.am
@@ -2,15 +2,19 @@
xlator_LTLIBRARIES = stripe.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
-stripe_la_LDFLAGS = -module -avoidversion
+stripe_la_LDFLAGS = -module -avoid-version
+
+stripe_la_SOURCES = stripe.c stripe-helpers.c \
+ $(top_builddir)/xlators/lib/src/libxlator.c
-stripe_la_SOURCES = stripe.c
stripe_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = stripe.h
+noinst_HEADERS = stripe.h stripe-mem-types.h $(top_builddir)/xlators/lib/src/libxlator.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/xlators/lib/src
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/cluster/stripe/src/stripe-helpers.c b/xlators/cluster/stripe/src/stripe-helpers.c
new file mode 100644
index 000000000..a83abdc72
--- /dev/null
+++ b/xlators/cluster/stripe/src/stripe-helpers.c
@@ -0,0 +1,675 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <fnmatch.h>
+
+#include "stripe.h"
+#include "byte-order.h"
+#include "mem-types.h"
+
+void
+stripe_local_wipe (stripe_local_t *local)
+{
+ if (!local)
+ goto out;
+
+ loc_wipe (&local->loc);
+ loc_wipe (&local->loc2);
+
+ if (local->fd)
+ fd_unref (local->fd);
+
+ if (local->inode)
+ inode_unref (local->inode);
+
+ if (local->xattr)
+ dict_unref (local->xattr);
+
+ if (local->xdata)
+ dict_unref (local->xdata);
+
+out:
+ return;
+}
+
+
+
+int
+stripe_aggregate (dict_t *this, char *key, data_t *value, void *data)
+{
+ dict_t *dst = NULL;
+ int64_t *ptr = 0, *size = NULL;
+ int32_t ret = -1;
+
+ dst = data;
+
+ if (strcmp (key, GF_XATTR_QUOTA_SIZE_KEY) == 0) {
+ ret = dict_get_bin (dst, key, (void **)&size);
+ if (ret < 0) {
+ size = GF_CALLOC (1, sizeof (int64_t),
+ gf_common_mt_char);
+ if (size == NULL) {
+ gf_log ("stripe", GF_LOG_WARNING,
+ "memory allocation failed");
+ goto out;
+ }
+ ret = dict_set_bin (dst, key, size, sizeof (int64_t));
+ if (ret < 0) {
+ gf_log ("stripe", GF_LOG_WARNING,
+ "stripe aggregate dict set failed");
+ GF_FREE (size);
+ goto out;
+ }
+ }
+
+ ptr = data_to_bin (value);
+ if (ptr == NULL) {
+ gf_log ("stripe", GF_LOG_WARNING, "data to bin failed");
+ goto out;
+ }
+
+ *size = hton64 (ntoh64 (*size) + ntoh64 (*ptr));
+ } else if (strcmp (key, GF_CONTENT_KEY)) {
+ /* No need to aggregate 'CONTENT' data */
+ ret = dict_set (dst, key, value);
+ if (ret)
+ gf_log ("stripe", GF_LOG_WARNING, "xattr dict set failed");
+ }
+
+out:
+ return 0;
+}
+
+
+void
+stripe_aggregate_xattr (dict_t *dst, dict_t *src)
+{
+ if ((dst == NULL) || (src == NULL)) {
+ goto out;
+ }
+
+ dict_foreach (src, stripe_aggregate, dst);
+out:
+ return;
+}
+
+
+int32_t
+stripe_xattr_aggregate (char *buffer, stripe_local_t *local, int32_t *total)
+{
+ int32_t i = 0;
+ int32_t ret = -1;
+ int32_t len = 0;
+ char *sbuf = NULL;
+ stripe_xattr_sort_t *xattr = NULL;
+
+ if (!buffer || !local || !local->xattr_list)
+ goto out;
+
+ sbuf = buffer;
+
+ for (i = 0; i < local->nallocs; i++) {
+ xattr = local->xattr_list + i;
+ len = xattr->xattr_len;
+
+ if (len && xattr && xattr->xattr_value) {
+ memcpy (buffer, xattr->xattr_value, len);
+ buffer += len;
+ *buffer++ = ' ';
+ }
+ }
+
+ *--buffer = '\0';
+ if (total)
+ *total = buffer - sbuf;
+ ret = 0;
+
+ out:
+ return ret;
+}
+
+int32_t
+stripe_free_xattr_str (stripe_local_t *local)
+{
+ int32_t i = 0;
+ int32_t ret = -1;
+ stripe_xattr_sort_t *xattr = NULL;
+
+ if (!local || !local->xattr_list)
+ goto out;
+
+ for (i = 0; i < local->nallocs; i++) {
+ xattr = local->xattr_list + i;
+
+ if (xattr && xattr->xattr_value)
+ GF_FREE (xattr->xattr_value);
+ }
+
+ ret = 0;
+ out:
+ return ret;
+}
+
+
+int32_t
+stripe_fill_lockinfo_xattr (xlator_t *this, stripe_local_t *local,
+ void **xattr_serz)
+{
+ int32_t ret = -1, i = 0, len = 0;
+ dict_t *tmp1 = NULL, *tmp2 = NULL;
+ char *buf = NULL;
+ stripe_xattr_sort_t *xattr = NULL;
+
+ if (xattr_serz == NULL) {
+ goto out;
+ }
+
+ tmp2 = dict_new ();
+
+ if (tmp2 == NULL) {
+ goto out;
+ }
+
+ for (i = 0; i < local->nallocs; i++) {
+ xattr = local->xattr_list + i;
+ len = xattr->xattr_len;
+
+ if (len && xattr && xattr->xattr_value) {
+ ret = dict_reset (tmp2);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "dict_reset failed (%s)",
+ strerror (-ret));
+ }
+
+ ret = dict_unserialize (xattr->xattr_value,
+ xattr->xattr_len,
+ &tmp2);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dict_unserialize failed (%s)",
+ strerror (-ret));
+ ret = -1;
+ goto out;
+ }
+
+ tmp1 = dict_copy (tmp2, tmp1);
+ if (tmp1 == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dict_copy failed (%s)",
+ strerror (-ret));
+ ret = -1;
+ goto out;
+ }
+ }
+ }
+
+ len = dict_serialized_length (tmp1);
+ if (len > 0) {
+ buf = GF_CALLOC (1, len, gf_common_mt_dict_t);
+ if (buf == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_serialize (tmp1, buf);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dict_serialize failed (%s)", strerror (-ret));
+ ret = -1;
+ goto out;
+ }
+
+ *xattr_serz = buf;
+ }
+
+ ret = 0;
+out:
+ if (tmp1 != NULL) {
+ dict_unref (tmp1);
+ }
+
+ if (tmp2 != NULL) {
+ dict_unref (tmp2);
+ }
+
+ return ret;
+}
+
+
+int32_t
+stripe_fill_pathinfo_xattr (xlator_t *this, stripe_local_t *local,
+ char **xattr_serz)
+{
+ int ret = -1;
+ int32_t padding = 0;
+ int32_t tlen = 0;
+ char stripe_size_str[20] = {0,};
+ char *pathinfo_serz = NULL;
+
+ if (!local) {
+ gf_log (this->name, GF_LOG_ERROR, "Possible NULL deref");
+ goto out;
+ }
+
+ (void) snprintf (stripe_size_str, 20, "%ld",
+ (local->fctx) ? local->fctx->stripe_size : 0);
+
+ /* extra bytes for decorations (brackets and <>'s) */
+ padding = strlen (this->name) + strlen (STRIPE_PATHINFO_HEADER)
+ + strlen (stripe_size_str) + 7;
+ local->xattr_total_len += (padding + 2);
+
+ pathinfo_serz = GF_CALLOC (local->xattr_total_len, sizeof (char),
+ gf_common_mt_char);
+ if (!pathinfo_serz)
+ goto out;
+
+ /* xlator info */
+ (void) sprintf (pathinfo_serz, "(<"STRIPE_PATHINFO_HEADER"%s:[%s]> ",
+ this->name, stripe_size_str);
+
+ ret = stripe_xattr_aggregate (pathinfo_serz + padding, local, &tlen);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Cannot aggregate pathinfo list");
+ goto out;
+ }
+
+ *(pathinfo_serz + padding + tlen) = ')';
+ *(pathinfo_serz + padding + tlen + 1) = '\0';
+
+ *xattr_serz = pathinfo_serz;
+
+ ret = 0;
+ out:
+ return ret;
+}
+
+/**
+ * stripe_get_matching_bs - Get the matching block size for the given path.
+ */
+int32_t
+stripe_get_matching_bs (const char *path, stripe_private_t *priv)
+{
+ struct stripe_options *trav = NULL;
+ uint64_t block_size = 0;
+
+ GF_VALIDATE_OR_GOTO ("stripe", priv, out);
+ GF_VALIDATE_OR_GOTO ("stripe", path, out);
+
+ LOCK (&priv->lock);
+ {
+ block_size = priv->block_size;
+ trav = priv->pattern;
+ while (trav) {
+ if (!fnmatch (trav->path_pattern, path, FNM_NOESCAPE)) {
+ block_size = trav->block_size;
+ break;
+ }
+ trav = trav->next;
+ }
+ }
+ UNLOCK (&priv->lock);
+
+out:
+ return block_size;
+}
+
+int32_t
+stripe_ctx_handle (xlator_t *this, call_frame_t *prev, stripe_local_t *local,
+ dict_t *dict)
+{
+ char key[256] = {0,};
+ data_t *data = NULL;
+ int32_t index = 0;
+ stripe_private_t *priv = NULL;
+
+ priv = this->private;
+
+
+ if (!local->fctx) {
+ local->fctx = GF_CALLOC (1, sizeof (stripe_fd_ctx_t),
+ gf_stripe_mt_stripe_fd_ctx_t);
+ if (!local->fctx) {
+ local->op_errno = ENOMEM;
+ local->op_ret = -1;
+ goto out;
+ }
+
+ local->fctx->static_array = 0;
+ }
+ /* Stripe block size */
+ sprintf (key, "trusted.%s.stripe-size", this->name);
+ data = dict_get (dict, key);
+ if (!data) {
+ local->xattr_self_heal_needed = 1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get stripe-size");
+ goto out;
+ } else {
+ if (!local->fctx->stripe_size) {
+ local->fctx->stripe_size =
+ data_to_int64 (data);
+ }
+
+ if (local->fctx->stripe_size != data_to_int64 (data)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "stripe-size mismatch in blocks");
+ local->xattr_self_heal_needed = 1;
+ }
+ }
+
+ /* Stripe count */
+ sprintf (key, "trusted.%s.stripe-count", this->name);
+ data = dict_get (dict, key);
+
+ if (!data) {
+ local->xattr_self_heal_needed = 1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get stripe-count");
+ goto out;
+ }
+ if (!local->fctx->xl_array) {
+ local->fctx->stripe_count = data_to_int32 (data);
+ if (!local->fctx->stripe_count) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error with stripe-count xattr");
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto out;
+ }
+
+ local->fctx->xl_array = GF_CALLOC (local->fctx->stripe_count,
+ sizeof (xlator_t *),
+ gf_stripe_mt_xlator_t);
+
+ if (!local->fctx->xl_array) {
+ local->op_errno = ENOMEM;
+ local->op_ret = -1;
+ goto out;
+ }
+ }
+ if (local->fctx->stripe_count != data_to_int32 (data)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error with stripe-count xattr (%d != %d)",
+ local->fctx->stripe_count, data_to_int32 (data));
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto out;
+ }
+
+ /* index */
+ sprintf (key, "trusted.%s.stripe-index", this->name);
+ data = dict_get (dict, key);
+ if (!data) {
+ local->xattr_self_heal_needed = 1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get stripe-index");
+ goto out;
+ }
+ index = data_to_int32 (data);
+ if (index > priv->child_count) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error with stripe-index xattr (%d)", index);
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto out;
+ }
+ if (local->fctx->xl_array) {
+ if (!local->fctx->xl_array[index])
+ local->fctx->xl_array[index] = prev->this;
+ }
+
+ sprintf(key, "trusted.%s.stripe-coalesce", this->name);
+ data = dict_get(dict, key);
+ if (!data) {
+ /*
+ * The file was probably created prior to coalesce support.
+ * Assume non-coalesce mode for this file to maintain backwards
+ * compatibility.
+ */
+ gf_log(this->name, GF_LOG_DEBUG, "missing stripe-coalesce "
+ "attr, assume non-coalesce mode");
+ local->fctx->stripe_coalesce = 0;
+ } else {
+ local->fctx->stripe_coalesce = data_to_int32(data);
+ }
+
+
+out:
+ return 0;
+}
+
+int32_t
+stripe_xattr_request_build (xlator_t *this, dict_t *dict, uint64_t stripe_size,
+ uint32_t stripe_count, uint32_t stripe_index,
+ uint32_t stripe_coalesce)
+{
+ char key[256] = {0,};
+ int32_t ret = -1;
+
+ sprintf (key, "trusted.%s.stripe-size", this->name);
+ ret = dict_set_int64 (dict, key, stripe_size);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set %s in xattr_req dict", key);
+ goto out;
+ }
+
+ sprintf (key, "trusted.%s.stripe-count", this->name);
+ ret = dict_set_int32 (dict, key, stripe_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set %s in xattr_req dict", key);
+ goto out;
+ }
+
+ sprintf (key, "trusted.%s.stripe-index", this->name);
+ ret = dict_set_int32 (dict, key, stripe_index);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set %s in xattr_req dict", key);
+ goto out;
+ }
+
+ sprintf(key, "trusted.%s.stripe-coalesce", this->name);
+ ret = dict_set_int32(dict, key, stripe_coalesce);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "failed to set %s in xattr_req_dict", key);
+ goto out;
+ }
+out:
+ return ret;
+}
+
+
+static int
+set_default_block_size (stripe_private_t *priv, char *num)
+{
+
+ int ret = -1;
+ GF_VALIDATE_OR_GOTO ("stripe", THIS, out);
+ GF_VALIDATE_OR_GOTO (THIS->name, priv, out);
+ GF_VALIDATE_OR_GOTO (THIS->name, num, out);
+
+
+ if (gf_string2bytesize (num, &priv->block_size) != 0) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "invalid number format \"%s\"", num);
+ goto out;
+ }
+
+ ret = 0;
+
+ out:
+ return ret;
+
+}
+
+
+int
+set_stripe_block_size (xlator_t *this, stripe_private_t *priv, char *data)
+{
+ int ret = -1;
+ char *tmp_str = NULL;
+ char *tmp_str1 = NULL;
+ char *dup_str = NULL;
+ char *stripe_str = NULL;
+ char *pattern = NULL;
+ char *num = NULL;
+ struct stripe_options *temp_stripeopt = NULL;
+ struct stripe_options *stripe_opt = NULL;
+
+ if (!this || !priv || !data)
+ goto out;
+
+ /* Get the pattern for striping.
+ "option block-size *avi:10MB" etc */
+ stripe_str = strtok_r (data, ",", &tmp_str);
+ while (stripe_str) {
+ dup_str = gf_strdup (stripe_str);
+ stripe_opt = GF_CALLOC (1, sizeof (struct stripe_options),
+ gf_stripe_mt_stripe_options);
+ if (!stripe_opt) {
+ goto out;
+ }
+
+ pattern = strtok_r (dup_str, ":", &tmp_str1);
+ num = strtok_r (NULL, ":", &tmp_str1);
+ if (!num) {
+ num = pattern;
+ pattern = "*";
+ ret = set_default_block_size (priv, num);
+ if (ret)
+ goto out;
+ }
+ if (gf_string2bytesize (num, &stripe_opt->block_size) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid number format \"%s\"", num);
+ goto out;
+ }
+
+ if (stripe_opt->block_size < STRIPE_MIN_BLOCK_SIZE) {
+ gf_log (this->name, GF_LOG_ERROR, "Invalid Block-size: "
+ "%s. Should be atleast %llu bytes", num,
+ STRIPE_MIN_BLOCK_SIZE);
+ goto out;
+ }
+ if (stripe_opt->block_size % 512) {
+ gf_log (this->name, GF_LOG_ERROR, "Block-size: %s should"
+ " be a multiple of 512 bytes", num);
+ goto out;
+ }
+
+ memcpy (stripe_opt->path_pattern, pattern, strlen (pattern));
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "block-size : pattern %s : size %"PRId64,
+ stripe_opt->path_pattern, stripe_opt->block_size);
+
+ if (priv->pattern)
+ temp_stripeopt = NULL;
+ else
+ temp_stripeopt = priv->pattern;
+
+ stripe_opt->next = temp_stripeopt;
+
+ priv->pattern = stripe_opt;
+ stripe_opt = NULL;
+
+ GF_FREE (dup_str);
+ dup_str = NULL;
+
+ stripe_str = strtok_r (NULL, ",", &tmp_str);
+ }
+
+ ret = 0;
+out:
+
+ GF_FREE (dup_str);
+
+ GF_FREE (stripe_opt);
+
+ return ret;
+}
+
+int32_t
+stripe_iatt_merge (struct iatt *from, struct iatt *to)
+{
+ if (to->ia_size < from->ia_size)
+ to->ia_size = from->ia_size;
+ if (to->ia_mtime < from->ia_mtime)
+ to->ia_mtime = from->ia_mtime;
+ if (to->ia_ctime < from->ia_ctime)
+ to->ia_ctime = from->ia_ctime;
+ if (to->ia_atime < from->ia_atime)
+ to->ia_atime = from->ia_atime;
+ return 0;
+}
+
+off_t
+coalesced_offset(off_t offset, uint64_t stripe_size, int stripe_count)
+{
+ size_t line_size = 0;
+ uint64_t stripe_num = 0;
+ off_t coalesced_offset = 0;
+
+ line_size = stripe_size * stripe_count;
+ stripe_num = offset / line_size;
+
+ coalesced_offset = (stripe_num * stripe_size) +
+ (offset % stripe_size);
+
+ return coalesced_offset;
+}
+
+off_t
+uncoalesced_size(off_t size, uint64_t stripe_size, int stripe_count,
+ int stripe_index)
+{
+ uint64_t nr_full_stripe_chunks = 0, mod = 0;
+
+ if (!size)
+ return size;
+
+ /*
+ * Estimate the number of fully written stripes from the
+ * local file size. Each stripe_size chunk corresponds to
+ * a stripe.
+ */
+ nr_full_stripe_chunks = (size / stripe_size) * stripe_count;
+ mod = size % stripe_size;
+
+ if (!mod) {
+ /*
+ * There is no remainder, thus we could have overestimated
+ * the size of the file in terms of chunks. Trim the number
+ * of chunks by the following stripe members and leave it
+ * up to those nodes to respond with a larger size (if
+ * necessary).
+ */
+ nr_full_stripe_chunks -= stripe_count -
+ (stripe_index + 1);
+ size = nr_full_stripe_chunks * stripe_size;
+ } else {
+ /*
+ * There is a remainder and thus we own the last chunk of the
+ * file. Add the preceding stripe members of the final stripe
+ * along with the remainder to calculate the exact size.
+ */
+ nr_full_stripe_chunks += stripe_index;
+ size = nr_full_stripe_chunks * stripe_size + mod;
+ }
+
+ return size;
+}
+
diff --git a/xlators/cluster/stripe/src/stripe-mem-types.h b/xlators/cluster/stripe/src/stripe-mem-types.h
new file mode 100644
index 000000000..e9ac9cf46
--- /dev/null
+++ b/xlators/cluster/stripe/src/stripe-mem-types.h
@@ -0,0 +1,31 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef __STRIPE_MEM_TYPES_H__
+#define __STRIPE_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_stripe_mem_types_ {
+ gf_stripe_mt_iovec = gf_common_mt_end + 1,
+ gf_stripe_mt_stripe_replies,
+ gf_stripe_mt_stripe_fd_ctx_t,
+ gf_stripe_mt_char,
+ gf_stripe_mt_int8_t,
+ gf_stripe_mt_int32_t,
+ gf_stripe_mt_xlator_t,
+ gf_stripe_mt_stripe_private_t,
+ gf_stripe_mt_stripe_options,
+ gf_stripe_mt_xattr_sort_t,
+ gf_stripe_mt_end
+};
+#endif
+
diff --git a/xlators/cluster/stripe/src/stripe.c b/xlators/cluster/stripe/src/stripe.c
index 64c81a539..69b510e23 100644
--- a/xlators/cluster/stripe/src/stripe.c
+++ b/xlators/cluster/stripe/src/stripe.c
@@ -1,305 +1,182 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
/**
* xlators/cluster/stripe:
- * Stripe translator, stripes the data accross its child nodes,
- * as per the options given in the volfile. The striping works
- * fairly simple. It writes files at different offset as per
- * calculation. So, 'ls -l' output at the real posix level will
- * show file size bigger than the actual size. But when one does
+ * Stripe translator, stripes the data across its child nodes,
+ * as per the options given in the volfile. The striping works
+ * fairly simple. It writes files at different offset as per
+ * calculation. So, 'ls -l' output at the real posix level will
+ * show file size bigger than the actual size. But when one does
* 'df' or 'du <file>', real size of the file on the server is shown.
*
* WARNING:
* Stripe translator can't regenerate data if a child node gets disconnected.
- * So, no 'self-heal' for stripe. Hence the advice, use stripe only when its
- * very much necessary, or else, use it in combination with AFR, to have a
- * backup copy.
- */
-
-/* TODO:
- * 1. Implement basic self-heal ability to manage the basic backend
- * layout missmatch.
- *
+ * So, no 'self-heal' for stripe. Hence the advice, use stripe only when its
+ * very much necessary, or else, use it in combination with AFR, to have a
+ * backup copy.
*/
+#include <fnmatch.h>
#include "stripe.h"
+#include "libxlator.h"
+#include "byte-order.h"
+#include "statedump.h"
-/**
- * stripe_get_matching_bs - Get the matching block size for the given path.
- */
-int32_t
-stripe_get_matching_bs (const char *path, struct stripe_options *opts,
- uint64_t default_bs)
-{
- struct stripe_options *trav = NULL;
- char *pathname = NULL;
- uint64_t block_size = 0;
-
- block_size = default_bs;
- pathname = strdup (path);
- trav = opts;
-
- while (trav) {
- if (!fnmatch (trav->path_pattern, pathname, FNM_NOESCAPE)) {
- block_size = trav->block_size;
- break;
- }
- trav = trav->next;
- }
- free (pathname);
-
- return block_size;
-}
-
+struct volume_options options[];
-/*
- * stripe_common_cbk -
- */
int32_t
-stripe_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-/**
- * stripe_stack_unwind_cbk - This function is used for all the _cbk without
- * any extra arguments (other than the minimum given)
- * This is called from functions like fsync,unlink,rmdir etc.
- *
- */
-int32_t
-stripe_stack_unwind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+stripe_sh_chown_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
- int32_t callcnt = 0;
+ int callcnt = -1;
stripe_local_t *local = NULL;
+ if (!this || !frame || !frame->local) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
local = frame->local;
LOCK (&frame->lock);
{
callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned %s",
- ((call_frame_t *)cookie)->this->name,
- strerror (op_errno));
- local->op_errno = op_errno;
- if (op_errno == ENOTCONN)
- local->failed = 1;
- }
- if (op_ret >= 0)
- local->op_ret = op_ret;
}
UNLOCK (&frame->lock);
if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- if (local->loc.path)
- loc_wipe (&local->loc);
- if (local->loc2.path)
- loc_wipe (&local->loc2);
-
- STACK_UNWIND (frame, local->op_ret, local->op_errno);
+ STRIPE_STACK_DESTROY (frame);
}
+out:
return 0;
}
-int32_t
-stripe_common_buf_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-/**
- * stripe_stack_unwind_buf_cbk - This function is used for all the _cbk with
- * 'struct stat *buf' as extra argument (other than minimum)
- * This is called from functions like, chmod, fchmod, chown, fchown,
- * truncate, ftruncate, utimens etc.
- *
- * @cookie - this argument should be always 'xlator_t *' of child node
- */
-int32_t
-stripe_stack_unwind_buf_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct stat *buf)
+int32_t
+stripe_sh_make_entry_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- int32_t callcnt = 0;
stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned error %s",
- ((call_frame_t *)cookie)->this->name,
- strerror (op_errno));
- local->op_errno = op_errno;
- if (op_errno == ENOTCONN)
- local->failed = 1;
- }
-
- if (op_ret == 0) {
- local->op_ret = 0;
- if (local->stbuf.st_blksize == 0) {
- local->stbuf = *buf;
- /* Because st_blocks gets added again */
- local->stbuf.st_blocks = 0;
- }
-
- if (FIRST_CHILD(this) ==
- ((call_frame_t *)cookie)->this) {
- /* Always, pass the inode number of
- first child to the above layer */
- local->stbuf.st_ino = buf->st_ino;
- local->stbuf.st_mtime = buf->st_mtime;
- }
-
- local->stbuf.st_blocks += buf->st_blocks;
- if (local->stbuf.st_size < buf->st_size)
- local->stbuf.st_size = buf->st_size;
- if (local->stbuf.st_blksize != buf->st_blksize) {
- /* TODO: add to blocks in terms of
- original block size */
- }
- }
+ if (!frame || !frame->local || !cookie || !this) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
}
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
- if (local->loc.path)
- loc_wipe (&local->loc);
- if (local->loc2.path)
- loc_wipe (&local->loc2);
+ prev = cookie;
+ local = frame->local;
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stbuf);
- }
+ STACK_WIND (frame, stripe_sh_chown_cbk, prev->this,
+ prev->this->fops->setattr, &local->loc,
+ &local->stbuf, (GF_SET_ATTR_UID | GF_SET_ATTR_GID), NULL);
+out:
return 0;
}
-/* In case of symlink, mknod, the file is created on just first node */
-int32_t
-stripe_common_inode_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct stat *buf)
+int32_t
+stripe_entry_self_heal (call_frame_t *frame, xlator_t *this,
+ stripe_local_t *local)
{
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
- return 0;
-}
+ xlator_list_t *trav = NULL;
+ call_frame_t *rframe = NULL;
+ stripe_local_t *rlocal = NULL;
+ stripe_private_t *priv = NULL;
+ dict_t *xdata = NULL;
+ int ret = 0;
-/**
- * stripe_stack_unwind_inode_cbk - This is called by the function like,
- * link (), symlink (), mkdir (), mknod ()
- * This creates a inode for new inode. It keeps a list of all
- * the inodes received from the child nodes. It is used while
- * forwarding any fops to child nodes.
- *
- */
-int32_t
-stripe_stack_unwind_inode_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, inode_t *inode,
- struct stat *buf)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
+ if (!local || !this || !frame) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
- local = frame->local;
+ if (!(IA_ISREG (local->stbuf.ia_type) ||
+ IA_ISDIR (local->stbuf.ia_type)))
+ return 0;
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned error %s",
- ((call_frame_t *)cookie)->this->name,
- strerror (op_errno));
- local->op_errno = op_errno;
- if (op_errno == ENOTCONN)
- local->failed = 1;
- }
-
- if (op_ret >= 0) {
- local->op_ret = 0;
+ priv = this->private;
+ trav = this->children;
+ rframe = copy_frame (frame);
+ if (!rframe) {
+ goto out;
+ }
+ rlocal = mem_get0 (this->local_pool);
+ if (!rlocal) {
+ goto out;
+ }
+ rframe->local = rlocal;
+ rlocal->call_count = priv->child_count;
+ loc_copy (&rlocal->loc, &local->loc);
+ memcpy (&rlocal->stbuf, &local->stbuf, sizeof (struct iatt));
- if (local->stbuf.st_blksize == 0) {
- local->inode = inode;
- local->stbuf = *buf;
- /* Because st_blocks gets added again */
- local->stbuf.st_blocks = 0;
- }
- if (FIRST_CHILD(this) ==
- ((call_frame_t *)cookie)->this) {
- local->stbuf.st_ino = buf->st_ino;
- local->stbuf.st_mtime = buf->st_mtime;
- }
+ xdata = dict_new ();
+ if (!xdata)
+ goto out;
- local->stbuf.st_blocks += buf->st_blocks;
- if (local->stbuf.st_size < buf->st_size)
- local->stbuf.st_size = buf->st_size;
- if (local->stbuf.st_blksize != buf->st_blksize) {
- /* TODO: add to blocks in terms of
- original block size */
- }
+ ret = dict_set_static_bin (xdata, "gfid-req", local->stbuf.ia_gfid, 16);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set gfid-req", local->loc.path);
+
+ while (trav) {
+ if (IA_ISREG (local->stbuf.ia_type)) {
+ STACK_WIND (rframe, stripe_sh_make_entry_cbk,
+ trav->xlator, trav->xlator->fops->mknod,
+ &local->loc,
+ st_mode_from_ia (local->stbuf.ia_prot,
+ local->stbuf.ia_type),
+ 0, 0, xdata);
}
+ if (IA_ISDIR (local->stbuf.ia_type)) {
+ STACK_WIND (rframe, stripe_sh_make_entry_cbk,
+ trav->xlator, trav->xlator->fops->mkdir,
+ &local->loc,
+ st_mode_from_ia (local->stbuf.ia_prot,
+ local->stbuf.ia_type),
+ 0, xdata);
+ }
+ trav = trav->next;
}
- UNLOCK (&frame->lock);
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
+ if (xdata)
+ dict_unref (xdata);
+ return 0;
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf);
- }
+out:
+ if (rframe)
+ STRIPE_STACK_DESTROY (rframe);
+ if (xdata)
+ dict_unref (xdata);
return 0;
}
-int32_t
-stripe_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+
+int32_t
+stripe_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct stat *buf, dict_t *dict)
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
{
- int32_t callcnt = 0;
- dict_t *tmp_dict = NULL;
- inode_t *tmp_inode = NULL;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = 0;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
prev = cookie;
local = frame->local;
@@ -307,89 +184,115 @@ stripe_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
LOCK (&frame->lock);
{
callcnt = --local->call_count;
-
+
if (op_ret == -1) {
if (op_errno != ENOENT)
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_DEBUG,
"%s returned error %s",
prev->this->name,
strerror (op_errno));
if (local->op_errno != ESTALE)
local->op_errno = op_errno;
- if ((op_errno == ENOTCONN) || (op_errno == ESTALE))
+ if (((op_errno != ENOENT) && (op_errno != ENOTCONN)) ||
+ (prev->this == FIRST_CHILD (this)))
local->failed = 1;
- /* TODO: bring in self-heal ability */
- /*
- * if (local->op_ret == 0) {
- * if (S_ISREG (local->stbuf.st_mode) ||
- * S_ISDIR (local->stbuf.st_mode))
- * local->entry_self_heal_needed = 1;
- * }
- */
- }
-
+ if (op_errno == ENOENT)
+ local->entry_self_heal_needed = 1;
+ }
+
if (op_ret >= 0) {
local->op_ret = 0;
+ if (IA_ISREG (buf->ia_type)) {
+ ret = stripe_ctx_handle (this, prev, local,
+ xdata);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error getting fctx info from"
+ " dict");
+ }
- if (local->stbuf.st_blksize == 0) {
+ if (FIRST_CHILD(this) == prev->this) {
+ local->stbuf = *buf;
+ local->postparent = *postparent;
local->inode = inode_ref (inode);
- local->stbuf = *buf;
- /* Because st_blocks gets added again */
- local->stbuf.st_blocks = 0;
+ if (xdata)
+ local->xdata = dict_ref (xdata);
+ if (local->xattr) {
+ stripe_aggregate_xattr (local->xdata,
+ local->xattr);
+ dict_unref (local->xattr);
+ local->xattr = NULL;
+ }
}
- if (FIRST_CHILD(this) == prev->this) {
- local->stbuf.st_ino = buf->st_ino;
- local->stbuf.st_mtime = buf->st_mtime;
- if (local->dict)
- dict_unref (local->dict);
- local->dict = dict_ref (dict);
- } else {
- if (!local->dict)
- local->dict = dict_ref (dict);
+
+ if (!local->xdata && !local->xattr) {
+ local->xattr = dict_ref (xdata);
+ } else if (local->xdata) {
+ stripe_aggregate_xattr (local->xdata, xdata);
+ } else if (local->xattr) {
+ stripe_aggregate_xattr (local->xattr, xdata);
}
- local->stbuf.st_blocks += buf->st_blocks;
- if (local->stbuf.st_size < buf->st_size)
- local->stbuf.st_size = buf->st_size;
- if (local->stbuf.st_blksize != buf->st_blksize) {
- /* TODO: add to blocks in terms of
- original block size */
+
+ local->stbuf_blocks += buf->ia_blocks;
+ local->postparent_blocks += postparent->ia_blocks;
+
+ correct_file_size(buf, local->fctx, prev);
+
+ if (local->stbuf_size < buf->ia_size)
+ local->stbuf_size = buf->ia_size;
+ if (local->postparent_size < postparent->ia_size)
+ local->postparent_size = postparent->ia_size;
+
+ if (uuid_is_null (local->ia_gfid))
+ uuid_copy (local->ia_gfid, buf->ia_gfid);
+
+ /* Make sure the gfid on all the nodes are same */
+ if (uuid_compare (local->ia_gfid, buf->ia_gfid)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: gfid different on subvolume %s",
+ local->loc.path, prev->this->name);
}
}
}
UNLOCK (&frame->lock);
if (!callcnt) {
+ if (local->op_ret == 0 && local->entry_self_heal_needed &&
+ !uuid_is_null (local->loc.inode->gfid))
+ stripe_entry_self_heal (frame, this, local);
+
if (local->failed)
local->op_ret = -1;
- tmp_dict = local->dict;
- tmp_inode = local->inode;
-
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, local->dict);
+ if (local->op_ret != -1) {
+ local->stbuf.ia_blocks = local->stbuf_blocks;
+ local->stbuf.ia_size = local->stbuf_size;
+ local->postparent.ia_blocks = local->postparent_blocks;
+ local->postparent.ia_size = local->postparent_size;
+ inode_ctx_put (local->inode, this,
+ (uint64_t) (long)local->fctx);
+ }
- if (tmp_inode)
- inode_unref (tmp_inode);
- if (tmp_dict)
- dict_unref (tmp_dict);
+ STRIPE_STACK_UNWIND (lookup, frame, local->op_ret,
+ local->op_errno, local->inode,
+ &local->stbuf, local->xdata,
+ &local->postparent);
}
-
+out:
return 0;
}
-
-/**
- * stripe_lookup -
- */
-int32_t
-stripe_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xattr_req)
+int32_t
+stripe_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
{
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
- char send_lookup_to_all = 0;
- int32_t op_errno = 1;
+ stripe_local_t *local = NULL;
+ xlator_list_t *trav = NULL;
+ stripe_private_t *priv = NULL;
+ int32_t op_errno = EINVAL;
+ int64_t filesize = 0;
+ int ret = 0;
+ uint64_t tmpctx = 0;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -401,172 +304,136 @@ stripe_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
trav = this->children;
/* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
local->op_ret = -1;
frame->local = local;
+ loc_copy (&local->loc, loc);
- if ((!loc->inode->st_mode) || S_ISDIR (loc->inode->st_mode) ||
- S_ISREG (loc->inode->st_mode)) {
- send_lookup_to_all = 1;
- }
+ inode_ctx_get (local->inode, this, &tmpctx);
+ if (tmpctx)
+ local->fctx = (stripe_fd_ctx_t*) (long)tmpctx;
- if (send_lookup_to_all) {
- /* Everytime in stripe lookup, all child nodes
- should be looked up */
- local->call_count = priv->child_count;
- while (trav) {
- STACK_WIND (frame, stripe_lookup_cbk, trav->xlator,
- trav->xlator->fops->lookup,
- loc, xattr_req);
- trav = trav->next;
- }
- } else {
- local->call_count = 1;
-
- STACK_WIND (frame, stripe_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup,
- loc, xattr_req);
+ /* quick-read friendly changes */
+ if (xdata && dict_get (xdata, GF_CONTENT_KEY)) {
+ ret = dict_get_int64 (xdata, GF_CONTENT_KEY, &filesize);
+ if (!ret && (filesize > priv->block_size))
+ dict_del (xdata, GF_CONTENT_KEY);
}
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-/**
- * stripe_stat -
- */
-int32_t
-stripe_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- int send_lookup_to_all = 0;
- xlator_list_t *trav = NULL;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- int32_t op_errno = 1;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (loc->inode, err);
-
- priv = this->private;
- trav = this->children;
+ /* get stripe-size xattr on lookup. This would be required for
+ * open/read/write/pathinfo calls. Hence we send down the request
+ * even when type == IA_INVAL */
+
+ /*
+ * We aren't guaranteed to have xdata here. We need the format info for
+ * the file, so allocate xdata if necessary.
+ */
+ if (!xdata)
+ xdata = dict_new();
+ else
+ xdata = dict_ref(xdata);
+
+ if (xdata && (IA_ISREG (loc->inode->ia_type) ||
+ (loc->inode->ia_type == IA_INVAL))) {
+ ret = stripe_xattr_request_build (this, xdata, 8, 4, 4, 0);
+ if (ret)
+ gf_log (this->name , GF_LOG_ERROR, "Failed to build"
+ " xattr request for %s", loc->path);
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
}
- if (S_ISDIR (loc->inode->st_mode) || S_ISREG (loc->inode->st_mode))
- send_lookup_to_all = 1;
-
- if (!send_lookup_to_all) {
- STACK_WIND (frame, stripe_common_buf_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat, loc);
- } else {
- /* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->inode = loc->inode;
- local->call_count = priv->child_count;
-
- while (trav) {
- STACK_WIND (frame, stripe_stack_unwind_buf_cbk,
- trav->xlator, trav->xlator->fops->stat,
- loc);
- trav = trav->next;
- }
+ /* Everytime in stripe lookup, all child nodes
+ should be looked up */
+ local->call_count = priv->child_count;
+ while (trav) {
+ STACK_WIND (frame, stripe_lookup_cbk, trav->xlator,
+ trav->xlator->fops->lookup, loc, xdata);
+ trav = trav->next;
}
+ dict_unref(xdata);
+
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+err:
+ STRIPE_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
return 0;
}
-/**
- * stripe_chmod -
- */
int32_t
-stripe_chmod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode)
+stripe_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- int send_fop_to_all = 0;
- xlator_list_t *trav = NULL;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- int32_t op_errno = 1;
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (loc->inode, err);
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+ prev = cookie;
+ local = frame->local;
- priv = this->private;
- trav = this->children;
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s returned error %s",
+ prev->this->name, strerror (op_errno));
+ local->op_errno = op_errno;
+ if ((op_errno != ENOENT) ||
+ (prev->this == FIRST_CHILD (this)))
+ local->failed = 1;
+ }
- if (S_ISDIR (loc->inode->st_mode) || S_ISREG (loc->inode->st_mode))
- send_fop_to_all = 1;
+ if (op_ret == 0) {
+ local->op_ret = 0;
- if (!send_fop_to_all) {
- STACK_WIND (frame, stripe_common_buf_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->chmod, loc, mode);
- } else {
- /* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
- if (!local) {
- op_errno = ENOMEM;
- goto err;
+ if (FIRST_CHILD(this) == prev->this) {
+ local->stbuf = *buf;
+ }
+
+ local->stbuf_blocks += buf->ia_blocks;
+
+ correct_file_size(buf, local->fctx, prev);
+
+ if (local->stbuf_size < buf->ia_size)
+ local->stbuf_size = buf->ia_size;
}
- local->op_ret = -1;
- frame->local = local;
- local->inode = loc->inode;
- local->call_count = priv->child_count;
+ }
+ UNLOCK (&frame->lock);
- while (trav) {
- STACK_WIND (frame, stripe_stack_unwind_buf_cbk,
- trav->xlator, trav->xlator->fops->chmod,
- loc, mode);
- trav = trav->next;
+ if (!callcnt) {
+ if (local->failed)
+ local->op_ret = -1;
+
+ if (local->op_ret != -1) {
+ local->stbuf.ia_size = local->stbuf_size;
+ local->stbuf.ia_blocks = local->stbuf_blocks;
}
+
+ STRIPE_STACK_UNWIND (stat, frame, local->op_ret,
+ local->op_errno, &local->stbuf, NULL);
}
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+out:
return 0;
}
-
-/**
- * stripe_chown -
- */
int32_t
-stripe_chown (call_frame_t *frame, xlator_t *this, loc_t *loc, uid_t uid,
- gid_t gid)
+stripe_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- int send_fop_to_all = 0;
xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
- int32_t op_errno = 1;
+ stripe_fd_ctx_t *fctx = NULL;
+ int32_t op_errno = EINVAL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -582,49 +449,48 @@ stripe_chown (call_frame_t *frame, xlator_t *this, loc_t *loc, uid_t uid,
goto err;
}
- if (S_ISDIR (loc->inode->st_mode) || S_ISREG (loc->inode->st_mode))
- send_fop_to_all = 1;
+ /* Initialization */
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ local->op_ret = -1;
+ frame->local = local;
+ local->call_count = priv->child_count;
- if (!send_fop_to_all) {
- STACK_WIND (frame, stripe_common_buf_cbk, trav->xlator,
- trav->xlator->fops->chown, loc, uid, gid);
- } else {
- /* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->inode = loc->inode;
- local->call_count = priv->child_count;
+ if (IA_ISREG(loc->inode->ia_type)) {
+ inode_ctx_get(loc->inode, this, (uint64_t *) &fctx);
+ if (!fctx)
+ goto err;
+ local->fctx = fctx;
+ }
- while (trav) {
- STACK_WIND (frame, stripe_stack_unwind_buf_cbk,
- trav->xlator, trav->xlator->fops->chown,
- loc, uid, gid);
- trav = trav->next;
- }
+ while (trav) {
+ STACK_WIND (frame, stripe_stat_cbk, trav->xlator,
+ trav->xlator->fops->stat, loc, NULL);
+ trav = trav->next;
}
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+
+err:
+ STRIPE_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
return 0;
}
-/**
- * stripe_statfs_cbk -
- */
int32_t
stripe_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *stbuf)
+ int32_t op_ret, int32_t op_errno, struct statvfs *stbuf, dict_t *xdata)
{
stripe_local_t *local = NULL;
int32_t callcnt = 0;
+ if (!this || !frame || !frame->local) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
local = frame->local;
LOCK(&frame->lock);
@@ -651,32 +517,32 @@ stripe_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
}
UNLOCK (&frame->lock);
-
+
if (!callcnt) {
- STACK_UNWIND (frame, local->op_ret,
- local->op_errno, &local->statvfs_buf);
+ STRIPE_STACK_UNWIND (statfs, frame, local->op_ret,
+ local->op_errno, &local->statvfs_buf, NULL);
}
-
+out:
return 0;
}
-
-/**
- * stripe_statfs -
- */
int32_t
-stripe_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
+stripe_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
stripe_local_t *local = NULL;
xlator_list_t *trav = NULL;
stripe_private_t *priv = NULL;
- int32_t op_errno = 1;
+ int32_t op_errno = EINVAL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
trav = this->children;
priv = this->private;
/* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -688,28 +554,99 @@ stripe_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
local->call_count = priv->child_count;
while (trav) {
STACK_WIND (frame, stripe_statfs_cbk, trav->xlator,
- trav->xlator->fops->statfs, loc);
+ trav->xlator->fops->statfs, loc, NULL);
trav = trav->next;
}
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+err:
+ STRIPE_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL);
return 0;
}
-/**
- * stripe_truncate -
- */
+
int32_t
-stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
+stripe_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s returned error %s",
+ prev->this->name, strerror (op_errno));
+ local->op_errno = op_errno;
+ if ((op_errno != ENOENT) ||
+ (prev->this == FIRST_CHILD (this)))
+ local->failed = 1;
+ }
+
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ if (FIRST_CHILD(this) == prev->this) {
+ local->pre_buf = *prebuf;
+ local->post_buf = *postbuf;
+ }
+
+ local->prebuf_blocks += prebuf->ia_blocks;
+ local->postbuf_blocks += postbuf->ia_blocks;
+
+ correct_file_size(prebuf, local->fctx, prev);
+ correct_file_size(postbuf, local->fctx, prev);
+
+ if (local->prebuf_size < prebuf->ia_size)
+ local->prebuf_size = prebuf->ia_size;
+
+ if (local->postbuf_size < postbuf->ia_size)
+ local->postbuf_size = postbuf->ia_size;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (local->failed)
+ local->op_ret = -1;
+
+ if (local->op_ret != -1) {
+ local->pre_buf.ia_blocks = local->prebuf_blocks;
+ local->pre_buf.ia_size = local->prebuf_size;
+ local->post_buf.ia_blocks = local->postbuf_blocks;
+ local->post_buf.ia_size = local->postbuf_size;
+ }
+
+ STRIPE_STACK_UNWIND (truncate, frame, local->op_ret,
+ local->op_errno, &local->pre_buf,
+ &local->post_buf, NULL);
+ }
+out:
+ return 0;
+}
+
+int32_t
+stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata)
{
- int send_fop_to_all = 0;
- xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
- int32_t op_errno = 1;
+ stripe_fd_ctx_t *fctx = NULL;
+ int32_t op_errno = EINVAL;
+ int i, eof_idx;
+ off_t dest_offset, tmp_offset;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -718,58 +655,157 @@ stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
VALIDATE_OR_GOTO (loc->inode, err);
priv = this->private;
- trav = this->children;
if (priv->first_child_down) {
op_errno = ENOTCONN;
goto err;
}
- if (S_ISDIR (loc->inode->st_mode) || S_ISREG (loc->inode->st_mode))
- send_fop_to_all = 1;
+ /* Initialization */
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ local->op_ret = -1;
+ frame->local = local;
+ local->call_count = priv->child_count;
+
+ inode_ctx_get(loc->inode, this, (uint64_t *) &fctx);
+ if (!fctx) {
+ gf_log(this->name, GF_LOG_ERROR, "no stripe context");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->fctx = fctx;
+ eof_idx = (offset / fctx->stripe_size) % fctx->stripe_count;
+
+ for (i = 0; i < fctx->stripe_count; i++) {
+ if (!fctx->xl_array[i]) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "no xlator at index %d", i);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (fctx->stripe_coalesce) {
+ /*
+ * The node that owns EOF is truncated to the exact
+ * coalesced offset. Nodes prior to this index should
+ * be rounded up to the size of the complete stripe,
+ * while nodes after this index should be rounded down
+ * to the size of the previous stripe.
+ */
+ if (i < eof_idx)
+ tmp_offset = roof(offset, fctx->stripe_size *
+ fctx->stripe_count);
+ else if (i > eof_idx)
+ tmp_offset = floor(offset, fctx->stripe_size *
+ fctx->stripe_count);
+ else
+ tmp_offset = offset;
+
+ dest_offset = coalesced_offset(tmp_offset,
+ fctx->stripe_size, fctx->stripe_count);
+ } else {
+ dest_offset = offset;
+ }
+
+ STACK_WIND(frame, stripe_truncate_cbk, fctx->xl_array[i],
+ fctx->xl_array[i]->fops->truncate, loc, dest_offset,
+ NULL);
+ }
- if (!send_fop_to_all) {
- STACK_WIND (frame, stripe_common_buf_cbk, trav->xlator,
- trav->xlator->fops->truncate, loc, offset);
- } else {
- /* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
- if (!local) {
- op_errno = ENOMEM;
- goto err;
+ return 0;
+err:
+ STRIPE_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int32_t
+stripe_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
+{
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s returned error %s",
+ prev->this->name, strerror (op_errno));
+ local->op_errno = op_errno;
+ if ((op_errno != ENOENT) ||
+ (prev->this == FIRST_CHILD (this)))
+ local->failed = 1;
}
- local->op_ret = -1;
- frame->local = local;
- local->inode = loc->inode;
- local->call_count = priv->child_count;
-
- while (trav) {
- STACK_WIND (frame, stripe_stack_unwind_buf_cbk,
- trav->xlator, trav->xlator->fops->truncate,
- loc, offset);
- trav = trav->next;
+
+ if (op_ret == 0) {
+ local->op_ret = 0;
+
+ if (FIRST_CHILD(this) == prev->this) {
+ local->pre_buf = *preop;
+ local->post_buf = *postop;
+ }
+
+ local->prebuf_blocks += preop->ia_blocks;
+ local->postbuf_blocks += postop->ia_blocks;
+
+ correct_file_size(preop, local->fctx, prev);
+ correct_file_size(postop, local->fctx, prev);
+
+ if (local->prebuf_size < preop->ia_size)
+ local->prebuf_size = preop->ia_size;
+ if (local->postbuf_size < postop->ia_size)
+ local->postbuf_size = postop->ia_size;
}
}
+ UNLOCK (&frame->lock);
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+ if (!callcnt) {
+ if (local->failed)
+ local->op_ret = -1;
+
+ if (local->op_ret != -1) {
+ local->pre_buf.ia_blocks = local->prebuf_blocks;
+ local->pre_buf.ia_size = local->prebuf_size;
+ local->post_buf.ia_blocks = local->postbuf_blocks;
+ local->post_buf.ia_size = local->postbuf_size;
+ }
+
+ STRIPE_STACK_UNWIND (setattr, frame, local->op_ret,
+ local->op_errno, &local->pre_buf,
+ &local->post_buf, NULL);
+ }
+out:
return 0;
}
-/**
- * stripe_utimens -
- */
-int32_t
-stripe_utimens (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct timespec tv[2])
+int32_t
+stripe_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- int send_fop_to_all = 0;
xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
- int32_t op_errno = 1;
+ stripe_fd_ctx_t *fctx = NULL;
+ int32_t op_errno = EINVAL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -785,46 +821,175 @@ stripe_utimens (call_frame_t *frame, xlator_t *this, loc_t *loc,
goto err;
}
- if (S_ISDIR (loc->inode->st_mode) || S_ISREG (loc->inode->st_mode))
- send_fop_to_all = 1;
+ /* Initialization */
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ local->op_ret = -1;
+ frame->local = local;
+ if (!IA_ISDIR (loc->inode->ia_type) &&
+ !IA_ISREG (loc->inode->ia_type)) {
+ local->call_count = 1;
+ STACK_WIND (frame, stripe_setattr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setattr,
+ loc, stbuf, valid, NULL);
+ return 0;
+ }
- if (!send_fop_to_all) {
- STACK_WIND (frame, stripe_common_buf_cbk, trav->xlator,
- trav->xlator->fops->utimens, loc, tv);
- } else {
- /* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->inode = loc->inode;
- local->call_count = priv->child_count;
-
- while (trav) {
- STACK_WIND (frame, stripe_stack_unwind_buf_cbk,
- trav->xlator, trav->xlator->fops->utimens,
- loc, tv);
- trav = trav->next;
- }
+ if (IA_ISREG(loc->inode->ia_type)) {
+ inode_ctx_get(loc->inode, this, (uint64_t *) &fctx);
+ if (!fctx)
+ goto err;
+ local->fctx = fctx;
+ }
+
+ local->call_count = priv->child_count;
+ while (trav) {
+ STACK_WIND (frame, stripe_setattr_cbk,
+ trav->xlator, trav->xlator->fops->setattr,
+ loc, stbuf, valid, NULL);
+ trav = trav->next;
}
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+err:
+ STRIPE_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
-int32_t
+int32_t
+stripe_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ stripe_private_t *priv = NULL;
+ xlator_list_t *trav = NULL;
+ int32_t op_errno = EINVAL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+
+ priv = this->private;
+ trav = this->children;
+
+ /* Initialization */
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ local->op_ret = -1;
+ frame->local = local;
+ local->call_count = priv->child_count;
+
+ while (trav) {
+ STACK_WIND (frame, stripe_setattr_cbk, trav->xlator,
+ trav->xlator->fops->fsetattr, fd, stbuf, valid, NULL);
+ trav = trav->next;
+ }
+
+ return 0;
+err:
+ STRIPE_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+stripe_stack_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s returned error %s",
+ prev->this->name, strerror (op_errno));
+ local->op_errno = op_errno;
+ if ((op_errno != ENOENT) ||
+ (prev->this == FIRST_CHILD (this)))
+ local->failed = 1;
+ }
+
+ if (op_ret == 0) {
+ local->op_ret = 0;
+
+ local->stbuf.ia_blocks += buf->ia_blocks;
+ local->preparent.ia_blocks += preoldparent->ia_blocks;
+ local->postparent.ia_blocks += postoldparent->ia_blocks;
+ local->pre_buf.ia_blocks += prenewparent->ia_blocks;
+ local->post_buf.ia_blocks += postnewparent->ia_blocks;
+
+ correct_file_size(buf, local->fctx, prev);
+
+ if (local->stbuf.ia_size < buf->ia_size)
+ local->stbuf.ia_size = buf->ia_size;
+
+ if (local->preparent.ia_size < preoldparent->ia_size)
+ local->preparent.ia_size = preoldparent->ia_size;
+
+ if (local->postparent.ia_size < postoldparent->ia_size)
+ local->postparent.ia_size = postoldparent->ia_size;
+
+ if (local->pre_buf.ia_size < prenewparent->ia_size)
+ local->pre_buf.ia_size = prenewparent->ia_size;
+
+ if (local->post_buf.ia_size < postnewparent->ia_size)
+ local->post_buf.ia_size = postnewparent->ia_size;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (local->failed)
+ local->op_ret = -1;
+
+ STRIPE_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
+ &local->stbuf, &local->preparent,
+ &local->postparent, &local->pre_buf,
+ &local->post_buf, NULL);
+ }
+out:
+ return 0;
+}
+
+int32_t
stripe_first_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
stripe_local_t *local = NULL;
xlator_list_t *trav = NULL;
+ if (!this || !frame || !frame->local) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ op_errno = EINVAL;
+ goto unwind;
+ }
+
if (op_ret == -1) {
goto unwind;
}
@@ -832,34 +997,39 @@ stripe_first_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
trav = this->children;
+ local->stbuf = *buf;
+ local->preparent = *preoldparent;
+ local->postparent = *postoldparent;
+ local->pre_buf = *prenewparent;
+ local->post_buf = *postnewparent;
+
local->op_ret = 0;
- local->stbuf = *buf;
local->call_count--;
- trav = trav->next; /* Skip first child */
+ trav = trav->next; /* Skip first child */
while (trav) {
- STACK_WIND (frame, stripe_stack_unwind_buf_cbk,
+ STACK_WIND (frame, stripe_stack_rename_cbk,
trav->xlator, trav->xlator->fops->rename,
- &local->loc, &local->loc2);
+ &local->loc, &local->loc2, NULL);
trav = trav->next;
}
return 0;
- unwind:
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+unwind:
+ STRIPE_STACK_UNWIND (rename, frame, -1, op_errno, buf, preoldparent,
+ postoldparent, prenewparent, postnewparent, NULL);
return 0;
}
-/**
- * stripe_rename -
- */
+
int32_t
-stripe_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc)
+stripe_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
{
stripe_private_t *priv = NULL;
stripe_local_t *local = NULL;
xlator_list_t *trav = NULL;
- int32_t op_errno = 1;
+ stripe_fd_ctx_t *fctx = NULL;
+ int32_t op_errno = EINVAL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -878,101 +1048,131 @@ stripe_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
}
/* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
local->op_ret = -1;
- local->inode = oldloc->inode;
loc_copy (&local->loc, oldloc);
loc_copy (&local->loc2, newloc);
local->call_count = priv->child_count;
-
+
+ if (IA_ISREG(oldloc->inode->ia_type)) {
+ inode_ctx_get(oldloc->inode, this, (uint64_t *) &fctx);
+ if (!fctx)
+ goto err;
+ local->fctx = fctx;
+ }
+
frame->local = local;
STACK_WIND (frame, stripe_first_rename_cbk, trav->xlator,
- trav->xlator->fops->rename, oldloc, newloc);
+ trav->xlator->fops->rename, oldloc, newloc, NULL);
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+err:
+ STRIPE_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL, NULL);
return 0;
}
-
-
-/**
- * stripe_access -
- */
int32_t
-stripe_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
+stripe_first_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- int32_t op_errno = 1;
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (loc->inode, err);
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
- STACK_WIND (frame, stripe_common_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->access, loc, mask);
+ prev = cookie;
+ local = frame->local;
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s returned %s",
+ prev->this->name, strerror (op_errno));
+ goto out;
+ }
+ local->op_ret = 0;
+ local->preparent = *preparent;
+ local->postparent = *postparent;
+ local->preparent_blocks += preparent->ia_blocks;
+ local->postparent_blocks += postparent->ia_blocks;
+
+ STRIPE_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, xdata);
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno);
+out:
+ STRIPE_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
+
return 0;
}
-/**
- * stripe_readlink_cbk -
- */
-int32_t
-stripe_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, const char *path)
-{
- STACK_UNWIND (frame, op_ret, op_errno, path);
- return 0;
-}
-/**
- * stripe_readlink -
- */
int32_t
-stripe_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size)
+stripe_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- int32_t op_errno = 1;
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (loc->inode, err);
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
- STACK_WIND (frame, stripe_readlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readlink, loc, size);
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s returned %s",
+ prev->this->name, strerror (op_errno));
+ local->op_errno = op_errno;
+ if (op_errno != ENOENT) {
+ local->failed = 1;
+ local->op_ret = op_ret;
+ }
+ }
+ }
+ UNLOCK (&frame->lock);
+ if (callcnt == 1) {
+ if (local->failed) {
+ op_errno = local->op_errno;
+ goto out;
+ }
+ STACK_WIND(frame, stripe_first_unlink_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->unlink, &local->loc,
+ local->xflag, local->xdata);
+ }
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+out:
+ STRIPE_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
+
return 0;
}
-
-/**
- * stripe_unlink -
- */
int32_t
-stripe_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
+stripe_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int xflag, dict_t *xdata)
{
- int send_fop_to_all = 0;
xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
- int32_t op_errno = 1;
+ int32_t op_errno = EINVAL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -987,82 +1187,132 @@ stripe_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
op_errno = ENOTCONN;
goto err;
}
-
- if (S_ISREG (loc->inode->st_mode))
- send_fop_to_all = 1;
- if (!send_fop_to_all) {
- STACK_WIND (frame, stripe_common_cbk, trav->xlator,
- trav->xlator->fops->unlink, loc);
- } else {
- /* Don't unlink a file if a node is down */
- if (priv->nodes_down) {
- op_errno = ENOTCONN;
- goto err;
- }
+ /* Don't unlink a file if a node is down */
+ if (priv->nodes_down) {
+ op_errno = ENOTCONN;
+ goto err;
+ }
- /* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- while (trav) {
- STACK_WIND (frame, stripe_stack_unwind_cbk,
- trav->xlator, trav->xlator->fops->unlink,
- loc);
- trav = trav->next;
- }
+ /* Initialization */
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ local->op_ret = -1;
+ loc_copy (&local->loc, loc);
+ local->xflag = xflag;
+
+ if (xdata)
+ local->xdata = dict_ref (xdata);
+
+ frame->local = local;
+ local->call_count = priv->child_count;
+ trav = trav->next; /* Skip the first child */
+
+ while (trav) {
+ STACK_WIND (frame, stripe_unlink_cbk,
+ trav->xlator, trav->xlator->fops->unlink,
+ loc, xflag, xdata);
+ trav = trav->next;
}
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno);
+err:
+ STRIPE_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
-int32_t
+int32_t
stripe_first_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno,struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
+ if (!this || !frame || !frame->local) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ op_errno = EINVAL;
+ goto err;
+ }
+
if (op_ret == -1) {
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
+ goto err;
}
- trav = this->children;
local = frame->local;
+ local->op_ret = 0;
local->call_count--; /* First child successful */
- trav = trav->next; /* Skip first child */
- while (trav) {
- STACK_WIND (frame, stripe_stack_unwind_cbk, trav->xlator,
- trav->xlator->fops->rmdir, &local->loc);
- trav = trav->next;
+ local->preparent = *preparent;
+ local->postparent = *postparent;
+ local->preparent_size = preparent->ia_size;
+ local->postparent_size = postparent->ia_size;
+ local->preparent_blocks += preparent->ia_blocks;
+ local->postparent_blocks += postparent->ia_blocks;
+
+ STRIPE_STACK_UNWIND (rmdir, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, xdata);
+ return 0;
+err:
+ STRIPE_STACK_UNWIND (rmdir, frame, op_ret, op_errno, NULL, NULL, NULL);
+ return 0;
+
+}
+
+int32_t
+stripe_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s returned %s",
+ prev->this->name, strerror (op_errno));
+ if (op_errno != ENOENT)
+ local->failed = 1;
+ }
}
+ UNLOCK (&frame->lock);
+ if (callcnt == 1) {
+ if (local->failed)
+ goto out;
+ STACK_WIND (frame, stripe_first_rmdir_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->rmdir, &local->loc,
+ local->flags, NULL);
+ }
+ return 0;
+out:
+ STRIPE_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
-/**
- * stripe_rmdir -
- */
int32_t
-stripe_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc)
+stripe_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, dict_t *xdata)
{
xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
- int32_t op_errno = 1;
+ int32_t op_errno = EINVAL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -1080,68 +1330,45 @@ stripe_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc)
}
/* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
local->op_ret = -1;
frame->local = local;
- local->inode = loc->inode;
loc_copy (&local->loc, loc);
+ local->flags = flags;
local->call_count = priv->child_count;
-
- STACK_WIND (frame, stripe_first_rmdir_cbk, trav->xlator,
- trav->xlator->fops->rmdir, loc);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno);
- return 0;
-}
-
-
-/**
- * stripe_setxattr -
- */
-int32_t
-stripe_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *dict, int32_t flags)
-{
- stripe_private_t *priv = NULL;
- int32_t op_errno = 1;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (loc->inode, err);
+ trav = trav->next; /* skip the first child */
- priv = this->private;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
+ while (trav) {
+ STACK_WIND (frame, stripe_rmdir_cbk, trav->xlator,
+ trav->xlator->fops->rmdir, loc, flags, NULL);
+ trav = trav->next;
}
- STACK_WIND (frame, stripe_common_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr, loc, dict, flags);
-
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno);
+err:
+ STRIPE_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
-int32_t
+int32_t
stripe_mknod_ifreg_fail_unlink_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
+ if (!this || !frame || !frame->local) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
local = frame->local;
LOCK (&frame->lock);
@@ -1151,11 +1378,11 @@ stripe_mknod_ifreg_fail_unlink_cbk (call_frame_t *frame, void *cookie,
UNLOCK (&frame->lock);
if (!callcnt) {
- loc_wipe (&local->loc);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf);
+ STRIPE_STACK_UNWIND (mknod, frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf,
+ &local->preparent, &local->postparent, NULL);
}
-
+out:
return 0;
}
@@ -1165,22 +1392,31 @@ stripe_mknod_ifreg_fail_unlink_cbk (call_frame_t *frame, void *cookie,
int32_t
stripe_mknod_ifreg_setxattr_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
xlator_list_t *trav = NULL;
+ call_frame_t *prev = NULL;
- LOCK (&frame->lock);
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ priv = this->private;
+ local = frame->local;
+
+ LOCK (&frame->lock);
{
callcnt = --local->call_count;
-
+
if (op_ret == -1) {
gf_log (this->name, GF_LOG_DEBUG,
"%s returned error %s",
- ((call_frame_t *)cookie)->this->name,
- strerror (op_errno));
+ prev->this->name, strerror (op_errno));
local->op_ret = -1;
local->op_errno = op_errno;
}
@@ -1195,141 +1431,239 @@ stripe_mknod_ifreg_setxattr_cbk (call_frame_t *frame, void *cookie,
stripe_mknod_ifreg_fail_unlink_cbk,
trav->xlator,
trav->xlator->fops->unlink,
- &local->loc);
+ &local->loc, 0, NULL);
trav = trav->next;
}
return 0;
}
- loc_wipe (&local->loc);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf);
+ STRIPE_STACK_UNWIND (mknod, frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf,
+ &local->preparent, &local->postparent, NULL);
}
+out:
return 0;
}
-/**
- */
int32_t
stripe_mknod_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct stat *buf)
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- int ret = 0;
int32_t callcnt = 0;
stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
stripe_private_t *priv = NULL;
+ call_frame_t *prev = NULL;
+ xlator_list_t *trav = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+ prev = cookie;
+ priv = this->private;
local = frame->local;
LOCK (&frame->lock);
{
callcnt = --local->call_count;
-
+
if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_DEBUG,
"%s returned error %s",
- ((call_frame_t *)cookie)->this->name,
- strerror (op_errno));
- local->failed = 1;
+ prev->this->name, strerror (op_errno));
+ if ((op_errno != ENOENT) ||
+ (prev->this == FIRST_CHILD (this)))
+ local->failed = 1;
local->op_errno = op_errno;
}
-
if (op_ret >= 0) {
local->op_ret = op_ret;
- /* Get the mapping in inode private */
- /* Get the stat buf right */
- if (local->stbuf.st_blksize == 0) {
- local->stbuf = *buf;
- /* Because st_blocks gets added again */
- local->stbuf.st_blocks = 0;
- }
- /* Always, pass the inode number of first child
- to the above layer */
- if (FIRST_CHILD(this) ==
- ((call_frame_t *)cookie)->this)
- local->stbuf.st_ino = buf->st_ino;
-
- local->stbuf.st_blocks += buf->st_blocks;
- if (local->stbuf.st_size < buf->st_size)
- local->stbuf.st_size = buf->st_size;
- if (local->stbuf.st_blksize != buf->st_blksize) {
- /* TODO: add to blocks in terms of
- original block size */
- }
+ /* Can be used as a mechanism to understand if mknod
+ was successful in at least one place */
+ if (uuid_is_null (local->ia_gfid))
+ uuid_copy (local->ia_gfid, buf->ia_gfid);
+
+ if (stripe_ctx_handle(this, prev, local, xdata))
+ gf_log(this->name, GF_LOG_ERROR,
+ "Error getting fctx info from dict");
+
+ local->stbuf_blocks += buf->ia_blocks;
+ local->preparent_blocks += preparent->ia_blocks;
+ local->postparent_blocks += postparent->ia_blocks;
+
+ correct_file_size(buf, local->fctx, prev);
+
+ if (local->stbuf_size < buf->ia_size)
+ local->stbuf_size = buf->ia_size;
+ if (local->preparent_size < preparent->ia_size)
+ local->preparent_size = preparent->ia_size;
+ if (local->postparent_size < postparent->ia_size)
+ local->postparent_size = postparent->ia_size;
}
}
UNLOCK (&frame->lock);
if (!callcnt) {
- if (local->failed)
+ if (local->failed)
local->op_ret = -1;
- if ((local->op_ret != -1) && priv->xattr_supported) {
- /* Send a setxattr request to nodes where the
- files are created */
- int32_t index = 0;
- char size_key[256] = {0,};
- char index_key[256] = {0,};
- char count_key[256] = {0,};
- dict_t *dict = NULL;
-
- trav = this->children;
- sprintf (size_key,
- "trusted.%s.stripe-size", this->name);
- sprintf (count_key,
- "trusted.%s.stripe-count", this->name);
- sprintf (index_key,
- "trusted.%s.stripe-index", this->name);
-
+ if ((local->op_ret == -1) && !uuid_is_null (local->ia_gfid)) {
+ /* ia_gfid set means, at least on one node 'mknod'
+ is successful */
local->call_count = priv->child_count;
-
+ trav = this->children;
while (trav) {
- dict = get_new_dict ();
- dict_ref (dict);
- /* TODO: check return value */
- ret = dict_set_int64 (dict, size_key,
- local->stripe_size);
- ret = dict_set_int32 (dict, count_key,
- priv->child_count);
- ret = dict_set_int32 (dict, index_key, index);
-
STACK_WIND (frame,
- stripe_mknod_ifreg_setxattr_cbk,
+ stripe_mknod_ifreg_fail_unlink_cbk,
trav->xlator,
- trav->xlator->fops->setxattr,
- &local->loc, dict, 0);
-
- dict_unref (dict);
- index++;
+ trav->xlator->fops->unlink,
+ &local->loc, 0, NULL);
trav = trav->next;
}
+ return 0;
+ }
+
+
+ if (local->op_ret != -1) {
+ local->preparent.ia_blocks = local->preparent_blocks;
+ local->preparent.ia_size = local->preparent_size;
+ local->postparent.ia_blocks = local->postparent_blocks;
+ local->postparent.ia_size = local->postparent_size;
+ local->stbuf.ia_size = local->stbuf_size;
+ local->stbuf.ia_blocks = local->stbuf_blocks;
+ inode_ctx_put (local->inode, this,
+ (uint64_t)(long) local->fctx);
+
+ }
+ STRIPE_STACK_UNWIND (mknod, frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf,
+ &local->preparent, &local->postparent, NULL);
+ }
+out:
+ return 0;
+}
+
+
+int32_t
+stripe_mknod_first_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ stripe_private_t *priv = NULL;
+ call_frame_t *prev = NULL;
+ xlator_list_t *trav = NULL;
+ int i = 1;
+ dict_t *dict = NULL;
+ int ret = 0;
+ int need_unref = 0;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ priv = this->private;
+ local = frame->local;
+ trav = this->children;
+
+ local->call_count--;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s returned error %s",
+ prev->this->name, strerror (op_errno));
+ local->failed = 1;
+ local->op_errno = op_errno;
+ goto out;
+ }
+
+ local->op_ret = op_ret;
+
+ local->stbuf = *buf;
+ local->preparent = *preparent;
+ local->postparent = *postparent;
+
+ if (uuid_is_null (local->ia_gfid))
+ uuid_copy (local->ia_gfid, buf->ia_gfid);
+ local->preparent.ia_blocks = local->preparent_blocks;
+ local->preparent.ia_size = local->preparent_size;
+ local->postparent.ia_blocks = local->postparent_blocks;
+ local->postparent.ia_size = local->postparent_size;
+ local->stbuf.ia_size = local->stbuf_size;
+ local->stbuf.ia_blocks = local->stbuf_blocks;
+
+ trav = trav->next;
+ while (trav) {
+ if (priv->xattr_supported) {
+ dict = dict_new ();
+ if (!dict) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to allocate dict %s", local->loc.path);
+ }
+ need_unref = 1;
+
+ dict_copy (local->xattr, dict);
+
+ ret = stripe_xattr_request_build (this, dict,
+ local->stripe_size,
+ priv->child_count, i,
+ priv->coalesce);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to build xattr request");
+
} else {
- /* Create itself has failed.. so return
- without setxattring */
- loc_wipe (&local->loc);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf);
+ dict = local->xattr;
}
+
+ STACK_WIND (frame, stripe_mknod_ifreg_cbk,
+ trav->xlator, trav->xlator->fops->mknod,
+ &local->loc, local->mode, local->rdev, 0, dict);
+ trav = trav->next;
+ i++;
+
+ if (dict && need_unref)
+ dict_unref (dict);
}
-
+
return 0;
+
+out:
+
+ STRIPE_STACK_UNWIND (mknod, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, NULL);
+ return 0;
}
-/**
- * stripe_mknod -
- */
int32_t
+stripe_single_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ STRIPE_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+
+int
stripe_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev)
+ dev_t rdev, mode_t umask, dict_t *xdata)
{
- stripe_private_t *priv = NULL;
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- int32_t op_errno = 1;
+ stripe_private_t *priv = NULL;
+ stripe_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+ int32_t i = 0;
+ dict_t *dict = NULL;
+ int ret = 0;
+ int need_unref = 0;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -1338,71 +1672,216 @@ stripe_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
VALIDATE_OR_GOTO (loc->inode, err);
priv = this->private;
- trav = this->children;
-
+
if (priv->first_child_down) {
op_errno = ENOTCONN;
goto err;
}
if (S_ISREG(mode)) {
- /* NOTE: on older kernels (older than 2.6.9),
- creat() fops is sent as mknod() + open(). Hence handling
+ /* NOTE: on older kernels (older than 2.6.9),
+ creat() fops is sent as mknod() + open(). Hence handling
S_IFREG files is necessary */
if (priv->nodes_down) {
- gf_log (this->name, GF_LOG_WARNING,
+ gf_log (this->name, GF_LOG_WARNING,
"Some node down, returning EIO");
op_errno = EIO;
goto err;
}
-
+
/* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
local->op_ret = -1;
local->op_errno = ENOTCONN;
- local->stripe_size = stripe_get_matching_bs (loc->path,
- priv->pattern,
- priv->block_size);
+ local->stripe_size = stripe_get_matching_bs (loc->path, priv);
frame->local = local;
- local->inode = loc->inode;
+ local->inode = inode_ref (loc->inode);
loc_copy (&local->loc, loc);
+ local->xattr = dict_copy_with_ref (xdata, NULL);
+ local->mode = mode;
+ local->umask = umask;
+ local->rdev = rdev;
/* Everytime in stripe lookup, all child nodes should
be looked up */
local->call_count = priv->child_count;
-
- while (trav) {
- STACK_WIND (frame, stripe_mknod_ifreg_cbk,
- trav->xlator, trav->xlator->fops->mknod,
- loc, mode, rdev);
- trav = trav->next;
+
+ if (priv->xattr_supported) {
+ dict = dict_new ();
+ if (!dict) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to allocate dict %s", loc->path);
+ }
+ need_unref = 1;
+
+ dict_copy (xdata, dict);
+
+ ret = stripe_xattr_request_build (this, dict,
+ local->stripe_size,
+ priv->child_count,
+ i, priv->coalesce);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to build xattr request");
+ } else {
+ dict = xdata;
}
- /* This case is handled, no need to continue further. */
- return 0;
- }
+ STACK_WIND (frame, stripe_mknod_first_ifreg_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->mknod,
+ loc, mode, rdev, umask, dict);
+ if (dict && need_unref)
+ dict_unref (dict);
+ return 0;
+ }
- STACK_WIND (frame, stripe_common_inode_cbk,
+ STACK_WIND (frame, stripe_single_mknod_cbk,
FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod,
- loc, mode, rdev);
+ loc, mode, rdev, umask, xdata);
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+err:
+ STRIPE_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int32_t
+stripe_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s returned error %s",
+ prev->this->name, strerror (op_errno));
+ local->op_errno = op_errno;
+ if ((op_errno != ENOENT) ||
+ (prev->this == FIRST_CHILD (this)))
+ local->failed = 1;
+ }
+
+ if (op_ret >= 0) {
+ local->op_ret = 0;
+
+ local->stbuf_blocks += buf->ia_blocks;
+ local->preparent_blocks += preparent->ia_blocks;
+ local->postparent_blocks += postparent->ia_blocks;
+
+ if (local->stbuf_size < buf->ia_size)
+ local->stbuf_size = buf->ia_size;
+ if (local->preparent_size < preparent->ia_size)
+ local->preparent_size = preparent->ia_size;
+ if (local->postparent_size < postparent->ia_size)
+ local->postparent_size = postparent->ia_size;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (local->failed != -1) {
+ local->preparent.ia_blocks = local->preparent_blocks;
+ local->preparent.ia_size = local->preparent_size;
+ local->postparent.ia_blocks = local->postparent_blocks;
+ local->postparent.ia_size = local->postparent_size;
+ local->stbuf.ia_size = local->stbuf_size;
+ local->stbuf.ia_blocks = local->stbuf_blocks;
+ }
+ STRIPE_STACK_UNWIND (mkdir, frame, local->op_ret,
+ local->op_errno, local->inode,
+ &local->stbuf, &local->preparent,
+ &local->postparent, NULL);
+ }
+out:
return 0;
}
-/**
- * stripe_mkdir -
- */
int32_t
-stripe_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode)
+stripe_first_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ xlator_list_t *trav = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
+ trav = this->children;
+
+ local->call_count--; /* first child is successful */
+ trav = trav->next; /* skip first child */
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s returned error %s",
+ prev->this->name, strerror (op_errno));
+ local->op_errno = op_errno;
+ goto out;
+ }
+
+ local->op_ret = 0;
+
+ local->inode = inode_ref (inode);
+ local->stbuf = *buf;
+ local->postparent = *postparent;
+ local->preparent = *preparent;
+
+ local->stbuf_blocks += buf->ia_blocks;
+ local->preparent_blocks += preparent->ia_blocks;
+ local->postparent_blocks += postparent->ia_blocks;
+
+ local->stbuf_size = buf->ia_size;
+ local->preparent_size = preparent->ia_size;
+ local->postparent_size = postparent->ia_size;
+
+ while (trav) {
+ STACK_WIND (frame, stripe_mkdir_cbk, trav->xlator,
+ trav->xlator->fops->mkdir, &local->loc, local->mode,
+ local->umask, local->xdata);
+ trav = trav->next;
+ }
+ return 0;
+out:
+ STRIPE_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL);
+
+ return 0;
+
+}
+
+
+int
+stripe_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
{
stripe_private_t *priv = NULL;
stripe_local_t *local = NULL;
@@ -1417,72 +1896,130 @@ stripe_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode)
priv = this->private;
trav = this->children;
-
+
if (priv->first_child_down) {
op_errno = ENOTCONN;
goto err;
}
/* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
local->op_ret = -1;
local->call_count = priv->child_count;
+ if (xdata)
+ local->xdata = dict_ref (xdata);
+ local->mode = mode;
+ local->umask = umask;
+ loc_copy (&local->loc, loc);
frame->local = local;
/* Everytime in stripe lookup, all child nodes should be looked up */
- while (trav) {
- STACK_WIND (frame, stripe_stack_unwind_inode_cbk,
- trav->xlator, trav->xlator->fops->mkdir,
- loc, mode);
- trav = trav->next;
- }
+ STACK_WIND (frame, stripe_first_mkdir_cbk, trav->xlator,
+ trav->xlator->fops->mkdir, loc, mode, umask, xdata);
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+err:
+ STRIPE_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
return 0;
}
-/**
- * stripe_symlink -
- */
int32_t
-stripe_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
- loc_t *loc)
+stripe_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- int32_t op_errno = 1;
- stripe_private_t *priv = NULL;
-
- priv = this->private;
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
}
- /* send symlink to only first node */
- STACK_WIND (frame, stripe_common_inode_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->symlink, linkpath, loc);
+ prev = cookie;
+ local = frame->local;
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s returned error %s",
+ prev->this->name, strerror (op_errno));
+ local->op_errno = op_errno;
+ if ((op_errno != ENOENT) ||
+ (prev->this == FIRST_CHILD (this)))
+ local->failed = 1;
+ }
+
+ if (op_ret >= 0) {
+ local->op_ret = 0;
+
+ if (IA_ISREG(inode->ia_type)) {
+ inode_ctx_get(inode, this, (uint64_t *) &fctx);
+ if (!fctx) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to get stripe context");
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+ }
+
+ if (FIRST_CHILD(this) == prev->this) {
+ local->inode = inode_ref (inode);
+ local->stbuf = *buf;
+ local->postparent = *postparent;
+ local->preparent = *preparent;
+ }
+ local->stbuf_blocks += buf->ia_blocks;
+ local->preparent_blocks += preparent->ia_blocks;
+ local->postparent_blocks += postparent->ia_blocks;
+
+ correct_file_size(buf, fctx, prev);
+
+ if (local->stbuf_size < buf->ia_size)
+ local->stbuf_size = buf->ia_size;
+ if (local->preparent_size < preparent->ia_size)
+ local->preparent_size = preparent->ia_size;
+ if (local->postparent_size < postparent->ia_size)
+ local->postparent_size = postparent->ia_size;
+ }
+ }
+ UNLOCK (&frame->lock);
+ if (!callcnt) {
+ if (local->failed)
+ local->op_ret = -1;
+
+ if (local->op_ret != -1) {
+ local->preparent.ia_blocks = local->preparent_blocks;
+ local->preparent.ia_size = local->preparent_size;
+ local->postparent.ia_blocks = local->postparent_blocks;
+ local->postparent.ia_size = local->postparent_size;
+ local->stbuf.ia_size = local->stbuf_size;
+ local->stbuf.ia_blocks = local->stbuf_blocks;
+ }
+ STRIPE_STACK_UNWIND (link, frame, local->op_ret,
+ local->op_errno, local->inode,
+ &local->stbuf, &local->preparent,
+ &local->postparent, NULL);
+ }
+out:
return 0;
}
-/**
- * stripe_link -
- */
int32_t
-stripe_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
+stripe_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
- int send_fop_to_all = 0;
xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
@@ -1503,49 +2040,45 @@ stripe_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
goto err;
}
- if (S_ISREG (oldloc->inode->st_mode))
- send_fop_to_all = 1;
+ /* Initialization */
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ local->op_ret = -1;
+ frame->local = local;
+ local->call_count = priv->child_count;
- if (!send_fop_to_all) {
- STACK_WIND (frame, stripe_common_inode_cbk,
+ /* Everytime in stripe lookup, all child
+ nodes should be looked up */
+ while (trav) {
+ STACK_WIND (frame, stripe_link_cbk,
trav->xlator, trav->xlator->fops->link,
- oldloc, newloc);
- } else {
- /* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- /* Everytime in stripe lookup, all child
- nodes should be looked up */
- while (trav) {
- STACK_WIND (frame, stripe_stack_unwind_inode_cbk,
- trav->xlator, trav->xlator->fops->link,
- oldloc, newloc);
- trav = trav->next;
- }
+ oldloc, newloc, NULL);
+ trav = trav->next;
}
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+err:
+ STRIPE_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
return 0;
}
-int32_t
+int32_t
stripe_create_fail_unlink_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
int32_t callcnt = 0;
- fd_t *lfd = NULL;
stripe_local_t *local = NULL;
+ if (!this || !frame || !frame->local) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
local = frame->local;
LOCK (&frame->lock);
@@ -1555,47 +2088,78 @@ stripe_create_fail_unlink_cbk (call_frame_t *frame, void *cookie,
UNLOCK (&frame->lock);
if (!callcnt) {
- lfd = local->fd;
- loc_wipe (&local->loc);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->fd, local->inode, &local->stbuf);
- fd_unref (lfd);
+ STRIPE_STACK_UNWIND (create, frame, local->op_ret, local->op_errno,
+ local->fd, local->inode, &local->stbuf,
+ &local->preparent, &local->postparent, NULL);
}
+out:
return 0;
}
-/**
- * stripe_create_setxattr_cbk -
- */
int32_t
-stripe_create_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+stripe_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ inode_t *inode, struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- fd_t *lfd = NULL;
+ int32_t callcnt = 0;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
+ call_frame_t *prev = NULL;
xlator_list_t *trav = NULL;
- int32_t callcnt = 0;
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ priv = this->private;
local = frame->local;
LOCK (&frame->lock);
{
callcnt = --local->call_count;
-
+
if (op_ret == -1) {
gf_log (this->name, GF_LOG_DEBUG,
"%s returned error %s",
- ((call_frame_t *)cookie)->this->name,
- strerror (op_errno));
- local->op_ret = -1;
+ prev->this->name, strerror (op_errno));
+ local->failed = 1;
local->op_errno = op_errno;
}
+
+ if (op_ret >= 0) {
+ if (IA_ISREG(buf->ia_type)) {
+ if (stripe_ctx_handle(this, prev, local, xdata))
+ gf_log(this->name, GF_LOG_ERROR,
+ "Error getting fctx info from "
+ "dict");
+ }
+
+ local->op_ret = op_ret;
+
+ local->stbuf_blocks += buf->ia_blocks;
+ local->preparent_blocks += preparent->ia_blocks;
+ local->postparent_blocks += postparent->ia_blocks;
+
+ correct_file_size(buf, local->fctx, prev);
+
+ if (local->stbuf_size < buf->ia_size)
+ local->stbuf_size = buf->ia_size;
+ if (local->preparent_size < preparent->ia_size)
+ local->preparent_size = preparent->ia_size;
+ if (local->postparent_size < postparent->ia_size)
+ local->postparent_size = postparent->ia_size;
+ }
}
UNLOCK (&frame->lock);
if (!callcnt) {
+ if (local->failed)
+ local->op_ret = -1;
+
if (local->op_ret == -1) {
local->call_count = priv->child_count;
trav = this->children;
@@ -1604,165 +2168,180 @@ stripe_create_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
stripe_create_fail_unlink_cbk,
trav->xlator,
trav->xlator->fops->unlink,
- &local->loc);
+ &local->loc, 0, NULL);
trav = trav->next;
}
-
+
return 0;
}
- lfd = local->fd;
- loc_wipe (&local->loc);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->fd, local->inode, &local->stbuf);
- fd_unref (lfd);
+ if (local->op_ret >= 0) {
+ local->preparent.ia_blocks = local->preparent_blocks;
+ local->preparent.ia_size = local->preparent_size;
+ local->postparent.ia_blocks = local->postparent_blocks;
+ local->postparent.ia_size = local->postparent_size;
+ local->stbuf.ia_size = local->stbuf_size;
+ local->stbuf.ia_blocks = local->stbuf_blocks;
+
+ stripe_copy_xl_array(local->fctx->xl_array,
+ priv->xl_array,
+ local->fctx->stripe_count);
+ inode_ctx_put(local->inode, this,
+ (uint64_t) local->fctx);
+ }
+
+ /* Create itself has failed.. so return
+ without setxattring */
+ STRIPE_STACK_UNWIND (create, frame, local->op_ret,
+ local->op_errno, local->fd,
+ local->inode, &local->stbuf,
+ &local->preparent, &local->postparent, NULL);
}
+out:
return 0;
}
-/**
- * stripe_create_cbk -
- */
+
+
int32_t
-stripe_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+stripe_first_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, fd_t *fd,
- inode_t *inode, struct stat *buf)
+ inode_t *inode, struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- int32_t callcnt = 0;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
- fd_t *lfd = NULL;
- stripe_fd_ctx_t *fctx = NULL;
+ call_frame_t *prev = NULL;
+ xlator_list_t *trav = NULL;
+ int i = 1;
+ dict_t *dict = NULL;
+ loc_t *loc = NULL;
+ int32_t need_unref = 0;
+ int32_t ret = -1;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+ prev = cookie;
priv = this->private;
local = frame->local;
+ trav = this->children;
+ loc = &local->loc;
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned error %s",
- ((call_frame_t *)cookie)->this->name,
- strerror (op_errno));
- local->failed = 1;
- local->op_errno = op_errno;
- }
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- /* Get the mapping in inode private */
- /* Get the stat buf right */
- if (local->stbuf.st_blksize == 0) {
- local->stbuf = *buf;
- /* Because st_blocks gets added again */
- local->stbuf.st_blocks = 0;
- }
-
- /* Always, pass the inode number of first
- child to the above layer */
- if (FIRST_CHILD(this) ==
- ((call_frame_t *)cookie)->this)
- local->stbuf.st_ino = buf->st_ino;
-
- local->stbuf.st_blocks += buf->st_blocks;
- if (local->stbuf.st_size < buf->st_size)
- local->stbuf.st_size = buf->st_size;
- if (local->stbuf.st_blksize != buf->st_blksize) {
- /* TODO: add to blocks in terms of
- original block size */
- }
- }
+ --local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s returned error %s",
+ prev->this->name, strerror (op_errno));
+ local->failed = 1;
+ local->op_errno = op_errno;
}
- UNLOCK (&frame->lock);
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
+ local->op_ret = 0;
+ /* Get the mapping in inode private */
+ /* Get the stat buf right */
+ local->stbuf = *buf;
+ local->preparent = *preparent;
+ local->postparent = *postparent;
+
+ local->stbuf_blocks += buf->ia_blocks;
+ local->preparent_blocks += preparent->ia_blocks;
+ local->postparent_blocks += postparent->ia_blocks;
+
+ if (local->stbuf_size < buf->ia_size)
+ local->stbuf_size = buf->ia_size;
+ if (local->preparent_size < preparent->ia_size)
+ local->preparent_size = preparent->ia_size;
+ if (local->postparent_size < postparent->ia_size)
+ local->postparent_size = postparent->ia_size;
+
+ if (local->failed)
+ local->op_ret = -1;
- /* */
- if (local->op_ret >= 0) {
- fctx = CALLOC (1, sizeof (stripe_fd_ctx_t));
- if (fctx) {
- fctx->stripe_size = local->stripe_size;
- fctx->stripe_count = priv->child_count;
- fctx->static_array = 1;
- fctx->xl_array = priv->xl_array;
- fd_ctx_set (local->fd, this,
- (uint64_t)(long)fctx);
- }
- }
+ if (local->op_ret == -1) {
+ local->call_count = 1;
+ STACK_WIND (frame, stripe_create_fail_unlink_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->unlink,
+ &local->loc, 0, NULL);
+ return 0;
+ }
- if ((local->op_ret != -1) &&
- local->stripe_size && priv->xattr_supported) {
- /* Send a setxattr request to nodes where
- the files are created */
- int ret = 0;
- int32_t i = 0;
- char size_key[256] = {0,};
- char index_key[256] = {0,};
- char count_key[256] = {0,};
- dict_t *dict = NULL;
-
- sprintf (size_key,
- "trusted.%s.stripe-size", this->name);
- sprintf (count_key,
- "trusted.%s.stripe-count", this->name);
- sprintf (index_key,
- "trusted.%s.stripe-index", this->name);
+ if (local->op_ret >= 0) {
+ local->preparent.ia_blocks = local->preparent_blocks;
+ local->preparent.ia_size = local->preparent_size;
+ local->postparent.ia_blocks = local->postparent_blocks;
+ local->postparent.ia_size = local->postparent_size;
+ local->stbuf.ia_size = local->stbuf_size;
+ local->stbuf.ia_blocks = local->stbuf_blocks;
+ }
- local->call_count = priv->child_count;
-
- for (i = 0; i < priv->child_count; i++) {
- dict = get_new_dict ();
- dict_ref (dict);
-
- /* TODO: check return values */
- ret = dict_set_int64 (dict, size_key,
- local->stripe_size);
- ret = dict_set_int32 (dict, count_key,
- priv->child_count);
- ret = dict_set_int32 (dict, index_key, i);
-
- STACK_WIND (frame, stripe_create_setxattr_cbk,
- priv->xl_array[i],
- priv->xl_array[i]->fops->setxattr,
- &local->loc, dict, 0);
-
- dict_unref (dict);
+ /* Send a setxattr request to nodes where the
+ files are created */
+ trav = trav->next;
+ while (trav) {
+ if (priv->xattr_supported) {
+ dict = dict_new ();
+ if (!dict) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to allocate dict %s", loc->path);
}
+ need_unref = 1;
+
+ dict_copy (local->xattr, dict);
+
+ ret = stripe_xattr_request_build (this, dict,
+ local->stripe_size,
+ priv->child_count,
+ i, priv->coalesce);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to build xattr request");
} else {
- /* Create itself has failed.. so return
- without setxattring */
- lfd = local->fd;
- loc_wipe (&local->loc);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->fd, local->inode, &local->stbuf);
-
- fd_unref (lfd);
+ dict = local->xattr;
}
+
+ STACK_WIND (frame, stripe_create_cbk, trav->xlator,
+ trav->xlator->fops->create, &local->loc,
+ local->flags, local->mode, local->umask, local->fd,
+ dict);
+ trav = trav->next;
+ if (need_unref && dict)
+ dict_unref (dict);
+ i++;
}
-
+
+out:
return 0;
}
+
/**
- * stripe_create - If a block-size is specified for the 'name', create the
+ * stripe_create - If a block-size is specified for the 'name', create the
* file in all the child nodes. If not, create it in only first child.
*
* @name- complete path of the file to be created.
*/
int32_t
stripe_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, mode_t mode, fd_t *fd)
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
stripe_private_t *priv = NULL;
stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- int32_t op_errno = 1;
+ int32_t op_errno = EINVAL;
+ int ret = 0;
+ int need_unref = 0;
+ int i = 0;
+ dict_t *dict = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
priv = this->private;
@@ -1770,121 +2349,86 @@ stripe_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
flags &= ~O_APPEND;
if (priv->first_child_down || priv->nodes_down) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_DEBUG,
"First node down, returning EIO");
op_errno = EIO;
goto err;
}
/* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
local->op_ret = -1;
local->op_errno = ENOTCONN;
- local->stripe_size = stripe_get_matching_bs (loc->path,
- priv->pattern,
- priv->block_size);
+ local->stripe_size = stripe_get_matching_bs (loc->path, priv);
frame->local = local;
- local->inode = loc->inode;
+ local->inode = inode_ref (loc->inode);
loc_copy (&local->loc, loc);
local->fd = fd_ref (fd);
+ local->flags = flags;
+ local->mode = mode;
+ local->umask = umask;
+ if (xdata)
+ local->xattr = dict_ref (xdata);
local->call_count = priv->child_count;
-
- trav = this->children;
- while (trav) {
- STACK_WIND (frame, stripe_create_cbk, trav->xlator,
- trav->xlator->fops->create, loc, flags, mode, fd);
- trav = trav->next;
- }
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-/**
- * stripe_open_cbk -
- */
-int32_t
-stripe_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- fd_t *lfd = NULL;
+ /* Send a setxattr request to nodes where the
+ files are created */
- local = frame->local;
+ if (priv->xattr_supported) {
+ dict = dict_new ();
+ if (!dict) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to allocate dict %s", loc->path);
+ }
+ need_unref = 1;
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
+ dict_copy (xdata, dict);
- if (op_ret == -1) {
- local->failed = 1;
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned error %s",
- ((call_frame_t *)cookie)->this->name,
- strerror (op_errno));
- local->op_ret = -1;
- local->op_errno = op_errno;
- }
-
- if (op_ret >= 0)
- local->op_ret = op_ret;
+ ret = stripe_xattr_request_build (this, dict,
+ local->stripe_size,
+ priv->child_count,
+ i, priv->coalesce);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to build xattr request");
+ } else {
+ dict = xdata;
}
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
- if (local->op_ret == -1) {
- if (local->fctx) {
- if (!local->fctx->static_array)
- FREE (local->fctx->xl_array);
- FREE (local->fctx);
- }
- } else {
- fd_ctx_set (local->fd, this,
- (uint64_t)(long)local->fctx);
- }
- lfd = local->fd;
- loc_wipe (&local->loc);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->fd);
- fd_unref (lfd);
+ STACK_WIND (frame, stripe_first_create_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->create, loc, flags, mode,
+ umask, fd, dict);
+
+ if (need_unref && dict)
+ dict_unref (dict);
- }
return 0;
+err:
+ STRIPE_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL, xdata);
+ return 0;
}
-
-/**
- * stripe_getxattr_cbk -
- */
int32_t
-stripe_open_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+stripe_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- int32_t index = 0;
- int32_t callcnt = 0;
- char key[256] = {0,};
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
- data_t *data = NULL;
- call_frame_t *prev = NULL;
- fd_t *lfd = NULL;
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
- prev = (call_frame_t *)cookie;
- priv = this->private;
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
local = frame->local;
LOCK (&frame->lock);
@@ -1892,145 +2436,39 @@ stripe_open_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
callcnt = --local->call_count;
if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
+
+ gf_log (this->name, GF_LOG_DEBUG,
"%s returned error %s",
- ((call_frame_t *)cookie)->this->name,
- strerror (op_errno));
- local->op_ret = -1;
- if (local->op_errno != EIO)
- local->op_errno = op_errno;
- if (op_errno == ENOTCONN)
+ prev->this->name, strerror (op_errno));
+ if ((op_errno != ENOENT) ||
+ (prev->this == FIRST_CHILD (this)))
local->failed = 1;
- goto unlock;
- }
-
- if (!local->fctx) {
- local->fctx = CALLOC (1, sizeof (stripe_fd_ctx_t));
- if (!local->fctx) {
- local->op_errno = ENOMEM;
- local->op_ret = -1;
- goto unlock;
- }
-
- local->fctx->static_array = 0;
- }
- /* Stripe block size */
- sprintf (key, "trusted.%s.stripe-size", this->name);
- data = dict_get (dict, key);
- if (!data) {
- local->xattr_self_heal_needed = 1;
- } else {
- if (!local->fctx->stripe_size) {
- local->fctx->stripe_size =
- data_to_int64 (data);
- }
-
- if (local->fctx->stripe_size != data_to_int64 (data)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "stripe-size mismatch in blocks");
- local->xattr_self_heal_needed = 1;
- }
- }
- /* Stripe count */
- sprintf (key, "trusted.%s.stripe-count", this->name);
- data = dict_get (dict, key);
- if (!data) {
- local->xattr_self_heal_needed = 1;
- goto unlock;
- }
- if (!local->fctx->xl_array) {
- local->fctx->stripe_count = data_to_int32 (data);
- if (!local->fctx->stripe_count) {
- gf_log (this->name, GF_LOG_ERROR,
- "error with stripe-count xattr");
- local->op_ret = -1;
- local->op_errno = EIO;
- goto unlock;
- }
- local->fctx->xl_array =
- CALLOC (local->fctx->stripe_count,
- sizeof (xlator_t *));
- }
- if (local->fctx->stripe_count != data_to_int32 (data)) {
- gf_log (this->name, GF_LOG_ERROR,
- "error with stripe-count xattr");
- local->op_ret = -1;
- local->op_errno = EIO;
- goto unlock;
+ local->op_errno = op_errno;
}
- /* index */
- sprintf (key, "trusted.%s.stripe-index", this->name);
- data = dict_get (dict, key);
- if (!data) {
- local->xattr_self_heal_needed = 1;
- goto unlock;
- }
- index = data_to_int32 (data);
- if (index > priv->child_count) {
- gf_log (this->name, GF_LOG_ERROR,
- "error with stripe-index xattr");
- local->op_ret = -1;
- local->op_errno = EIO;
- goto unlock;
- }
- if (local->fctx->xl_array)
- local->fctx->xl_array[index] = prev->this;
- local->entry_count++;
- local->op_ret = 0;
+ if (op_ret >= 0)
+ local->op_ret = op_ret;
}
- unlock:
UNLOCK (&frame->lock);
-
- if (!callcnt) {
- /* TODO: if self-heal flag is set, do it */
- if (local->xattr_self_heal_needed) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: stripe info need to be healed",
- local->loc.path);
- }
-
- if (local->op_ret)
- goto err;
- if (local->entry_count != local->fctx->stripe_count) {
- local->op_ret = -1;
- local->op_errno = EIO;
- goto err;
- }
- if (!local->fctx->stripe_size) {
+ if (!callcnt) {
+ if (local->failed)
local->op_ret = -1;
- local->op_errno = EIO;
- goto err;
- }
-
- local->call_count = local->fctx->stripe_count;
- trav = this->children;
- while (trav) {
- STACK_WIND (frame, stripe_open_cbk, trav->xlator,
- trav->xlator->fops->open, &local->loc,
- local->flags, local->fd);
- trav = trav->next;
- }
+ STRIPE_STACK_UNWIND (open, frame, local->op_ret,
+ local->op_errno, local->fd, xdata);
}
-
- return 0;
- err:
- lfd = local->fd;
- loc_wipe (&local->loc);
- STACK_UNWIND (frame, local->op_ret, local->op_errno, local->fd);
- fd_unref (lfd);
-
+out:
return 0;
}
+
/**
- * stripe_open -
+ * stripe_open -
*/
int32_t
stripe_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, fd_t *fd)
+ int32_t flags, fd_t *fd, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
@@ -2052,7 +2490,7 @@ stripe_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
}
/* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -2063,97 +2501,76 @@ stripe_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
local->fd = fd_ref (fd);
frame->local = local;
- local->inode = loc->inode;
loc_copy (&local->loc, loc);
/* Striped files */
local->flags = flags;
local->call_count = priv->child_count;
- local->stripe_size = stripe_get_matching_bs (loc->path,
- priv->pattern,
- priv->block_size);
-
- if (priv->xattr_supported) {
- while (trav) {
- STACK_WIND (frame, stripe_open_getxattr_cbk,
- trav->xlator, trav->xlator->fops->getxattr,
- loc, NULL);
- trav = trav->next;
- }
- } else {
- local->fctx = CALLOC (1, sizeof (stripe_fd_ctx_t));
- if (!local->fctx) {
- op_errno = ENOMEM;
- goto err;
- }
-
- local->fctx->static_array = 1;
- local->fctx->stripe_size = local->stripe_size;
- local->fctx->stripe_count = priv->child_count;
- local->fctx->xl_array = priv->xl_array;
-
- while (trav) {
- STACK_WIND (frame, stripe_open_cbk, trav->xlator,
- trav->xlator->fops->open,
- &local->loc, local->flags, local->fd);
- trav = trav->next;
- }
- }
+ local->stripe_size = stripe_get_matching_bs (loc->path, priv);
+ while (trav) {
+ STACK_WIND (frame, stripe_open_cbk, trav->xlator,
+ trav->xlator->fops->open,
+ &local->loc, local->flags, local->fd,
+ xdata);
+ trav = trav->next;
+ }
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+err:
+ STRIPE_STACK_UNWIND (open, frame, -1, op_errno, NULL, NULL);
return 0;
}
-/**
- * stripe_opendir_cbk -
- */
+
int32_t
stripe_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
int32_t callcnt = 0;
- stripe_local_t *local = frame->local;
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
LOCK (&frame->lock);
{
callcnt = --local->call_count;
if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_DEBUG,
"%s returned error %s",
- ((call_frame_t *)cookie)->this->name,
- strerror (op_errno));
+ prev->this->name, strerror (op_errno));
local->op_ret = -1;
- local->failed = 1;
local->op_errno = op_errno;
}
-
- if (op_ret >= 0)
+
+ if (op_ret >= 0)
local->op_ret = op_ret;
}
UNLOCK (&frame->lock);
if (!callcnt) {
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->fd);
+ STRIPE_STACK_UNWIND (opendir, frame, local->op_ret,
+ local->op_errno, local->fd, NULL);
}
-
+out:
return 0;
}
-/**
- * stripe_opendir -
- */
int32_t
-stripe_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
+stripe_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, dict_t *xdata)
{
xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
- int32_t op_errno = 1;
+ int32_t op_errno = EINVAL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -2170,118 +2587,61 @@ stripe_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
}
/* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
frame->local = local;
- local->inode = loc->inode;
- local->fd = fd;
local->call_count = priv->child_count;
+ local->fd = fd_ref (fd);
while (trav) {
STACK_WIND (frame, stripe_opendir_cbk, trav->xlator,
- trav->xlator->fops->opendir, loc, fd);
+ trav->xlator->fops->opendir, loc, fd, NULL);
trav = trav->next;
}
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
-}
-
-
-/**
- * stripe_getxattr_cbk -
- */
-int32_t
-stripe_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *value)
-{
- STACK_UNWIND (frame, op_ret, op_errno, value);
- return 0;
-}
-
-
-/**
- * stripe_getxattr -
- */
-int32_t
-stripe_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
-{
- int32_t op_errno = 1;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (loc->inode, err);
-
- STACK_WIND (frame, stripe_getxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, loc, name);
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+err:
+ STRIPE_STACK_UNWIND (opendir, frame, -1, op_errno, NULL, NULL);
return 0;
}
-/**
- * stripe_removexattr -
- */
-int32_t
-stripe_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
-{
- int32_t op_errno = 1;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (loc->inode, err);
-
- STACK_WIND (frame, stripe_common_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr, loc, name);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno);
- return 0;
-}
-
-
-/**
- * stripe_lk_cbk -
- */
int32_t
stripe_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
local = frame->local;
LOCK (&frame->lock);
{
callcnt = --local->call_count;
if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_DEBUG,
"%s returned error %s",
- ((call_frame_t *)cookie)->this->name,
- strerror (op_errno));
+ prev->this->name, strerror (op_errno));
local->op_errno = op_errno;
- if (op_errno == ENOTCONN)
+ if ((op_errno != ENOENT) ||
+ (prev->this == FIRST_CHILD (this)))
local->failed = 1;
}
- if (op_ret == 0 && local->op_ret == -1) {
- /* First successful call, copy the *lock */
- local->op_ret = 0;
- local->lock = *lock;
+ if (op_ret >= 0) {
+ if (FIRST_CHILD(this) == prev->this) {
+ /* First successful call, copy the *lock */
+ local->op_ret = op_ret;
+ local->lock = *lock;
+ }
}
}
UNLOCK (&frame->lock);
@@ -2289,24 +2649,21 @@ stripe_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (!callcnt) {
if (local->failed)
local->op_ret = -1;
- STACK_UNWIND (frame, local->op_ret,
- local->op_errno, &local->lock);
+ STRIPE_STACK_UNWIND (lk, frame, local->op_ret,
+ local->op_errno, &local->lock, NULL);
}
+out:
return 0;
}
-
-/**
- * stripe_lk -
- */
int32_t
stripe_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
- struct flock *lock)
+ struct gf_flock *lock, dict_t *xdata)
{
stripe_local_t *local = NULL;
xlator_list_t *trav = NULL;
stripe_private_t *priv = NULL;
- int32_t op_errno = 1;
+ int32_t op_errno = EINVAL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -2317,77 +2674,75 @@ stripe_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
priv = this->private;
/* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
local->op_ret = -1;
frame->local = local;
-
local->call_count = priv->child_count;
-
+
while (trav) {
STACK_WIND (frame, stripe_lk_cbk, trav->xlator,
- trav->xlator->fops->lk, fd, cmd, lock);
+ trav->xlator->fops->lk, fd, cmd, lock, NULL);
trav = trav->next;
}
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+err:
+ STRIPE_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL);
return 0;
}
-/**
- * stripe_writedir -
- */
+
int32_t
-stripe_setdents (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t flags, dir_entry_t *entries, int32_t count)
+stripe_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- int32_t op_errno = 1;
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
- priv = this->private;
- trav = this->children;
+ prev = cookie;
+ local = frame->local;
- /* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
- while (trav) {
- STACK_WIND (frame, stripe_stack_unwind_cbk, trav->xlator,
- trav->xlator->fops->setdents, fd, flags, entries,
- count);
- trav = trav->next;
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s returned %s",
+ prev->this->name, strerror (op_errno));
+ local->op_errno = op_errno;
+ if ((op_errno != ENOENT) ||
+ (prev->this == FIRST_CHILD (this)))
+ local->failed = 1;
+ }
+ if (op_ret >= 0)
+ local->op_ret = op_ret;
}
+ UNLOCK (&frame->lock);
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno);
+ if (!callcnt) {
+ if (local->failed)
+ local->op_ret = -1;
+
+ STRIPE_STACK_UNWIND (flush, frame, local->op_ret,
+ local->op_errno, NULL);
+ }
+out:
return 0;
}
-
-/**
- * stripe_flush -
- */
int32_t
-stripe_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+stripe_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
@@ -2407,7 +2762,7 @@ stripe_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
goto err;
}
/* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -2415,73 +2770,98 @@ stripe_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
local->op_ret = -1;
frame->local = local;
local->call_count = priv->child_count;
-
+
while (trav) {
- STACK_WIND (frame, stripe_stack_unwind_cbk, trav->xlator,
- trav->xlator->fops->flush, fd);
+ STACK_WIND (frame, stripe_flush_cbk, trav->xlator,
+ trav->xlator->fops->flush, fd, NULL);
trav = trav->next;
}
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno);
+err:
+ STRIPE_STACK_UNWIND (flush, frame, -1, op_errno, NULL);
return 0;
}
-/**
- * stripe_fsync -
- */
+
int32_t
-stripe_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
+stripe_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- int32_t op_errno = 1;
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
- priv = this->private;
- trav = this->children;
+ prev = cookie;
+ local = frame->local;
- /* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- while (trav) {
- STACK_WIND (frame, stripe_stack_unwind_cbk, trav->xlator,
- trav->xlator->fops->fsync, fd, flags);
- trav = trav->next;
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s returned %s",
+ prev->this->name, strerror (op_errno));
+ local->op_errno = op_errno;
+ if ((op_errno != ENOENT) ||
+ (prev->this == FIRST_CHILD (this)))
+ local->failed = 1;
+ }
+ if (op_ret >= 0) {
+ local->op_ret = op_ret;
+ if (FIRST_CHILD(this) == prev->this) {
+ local->pre_buf = *prebuf;
+ local->post_buf = *postbuf;
+ }
+ local->prebuf_blocks += prebuf->ia_blocks;
+ local->postbuf_blocks += postbuf->ia_blocks;
+
+ correct_file_size(prebuf, local->fctx, prev);
+ correct_file_size(postbuf, local->fctx, prev);
+
+ if (local->prebuf_size < prebuf->ia_size)
+ local->prebuf_size = prebuf->ia_size;
+
+ if (local->postbuf_size < postbuf->ia_size)
+ local->postbuf_size = postbuf->ia_size;
+ }
}
+ UNLOCK (&frame->lock);
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno);
+ if (!callcnt) {
+ if (local->failed)
+ local->op_ret = -1;
+
+ if (local->op_ret != -1) {
+ local->pre_buf.ia_blocks = local->prebuf_blocks;
+ local->pre_buf.ia_size = local->prebuf_size;
+ local->post_buf.ia_blocks = local->postbuf_blocks;
+ local->post_buf.ia_size = local->postbuf_size;
+ }
+
+ STRIPE_STACK_UNWIND (fsync, frame, local->op_ret,
+ local->op_errno, &local->pre_buf,
+ &local->post_buf, NULL);
+ }
+out:
return 0;
}
-
-/**
- * stripe_fstat -
- */
int32_t
-stripe_fstat (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
+stripe_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
xlator_list_t *trav = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = 1;
VALIDATE_OR_GOTO (frame, err);
@@ -2493,82 +2873,107 @@ stripe_fstat (call_frame_t *frame,
trav = this->children;
/* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
+
+ inode_ctx_get(fd->inode, this, (uint64_t *) &fctx);
+ if (!fctx) {
+ op_errno = EINVAL;
+ goto err;
+ }
+ local->fctx = fctx;
+
local->op_ret = -1;
frame->local = local;
- local->inode = fd->inode;
local->call_count = priv->child_count;
-
+
while (trav) {
- STACK_WIND (frame, stripe_stack_unwind_buf_cbk, trav->xlator,
- trav->xlator->fops->fstat, fd);
+ STACK_WIND (frame, stripe_fsync_cbk, trav->xlator,
+ trav->xlator->fops->fsync, fd, flags, NULL);
trav = trav->next;
}
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+err:
+ STRIPE_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
-
-/**
- * stripe_fchmod -
- */
-int32_t
-stripe_fchmod (call_frame_t *frame, xlator_t *this, fd_t *fd, mode_t mode)
+int32_t
+stripe_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- int32_t op_errno = 1;
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
- priv = this->private;
- trav = this->children;
+ prev = cookie;
+ local = frame->local;
- /* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
- if (!local) {
- op_errno = ENOMEM;
- goto err;
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s returned error %s",
+ prev->this->name, strerror (op_errno));
+ local->op_errno = op_errno;
+ if ((op_errno != ENOENT) ||
+ (prev->this == FIRST_CHILD (this)))
+ local->failed = 1;
+ }
+
+ if (op_ret == 0) {
+ local->op_ret = 0;
+
+ if (FIRST_CHILD(this) == prev->this)
+ local->stbuf = *buf;
+
+ local->stbuf_blocks += buf->ia_blocks;
+
+ correct_file_size(buf, local->fctx, prev);
+
+ if (local->stbuf_size < buf->ia_size)
+ local->stbuf_size = buf->ia_size;
+ }
}
- local->op_ret = -1;
- frame->local = local;
- local->inode = fd->inode;
- local->call_count = priv->child_count;
-
- while (trav) {
- STACK_WIND (frame, stripe_stack_unwind_buf_cbk, trav->xlator,
- trav->xlator->fops->fchmod, fd, mode);
- trav = trav->next;
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (local->failed)
+ local->op_ret = -1;
+
+ if (local->op_ret != -1) {
+ local->stbuf.ia_size = local->stbuf_size;
+ local->stbuf.ia_blocks = local->stbuf_blocks;
+ }
+
+ STRIPE_STACK_UNWIND (fstat, frame, local->op_ret,
+ local->op_errno, &local->stbuf, NULL);
}
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+out:
return 0;
}
-
-/**
- * stripe_fchown -
- */
-int32_t
-stripe_fchown (call_frame_t *frame, xlator_t *this, fd_t *fd, uid_t uid,
- gid_t gid)
+int32_t
+stripe_fstat (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
xlator_list_t *trav = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = 1;
VALIDATE_OR_GOTO (frame, err);
@@ -2580,39 +2985,44 @@ stripe_fchown (call_frame_t *frame, xlator_t *this, fd_t *fd, uid_t uid,
trav = this->children;
/* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
local->op_ret = -1;
frame->local = local;
- local->inode = fd->inode;
local->call_count = priv->child_count;
-
+
+ if (IA_ISREG(fd->inode->ia_type)) {
+ inode_ctx_get(fd->inode, this, (uint64_t *) &fctx);
+ if (!fctx)
+ goto err;
+ local->fctx = fctx;
+ }
+
while (trav) {
- STACK_WIND (frame, stripe_stack_unwind_buf_cbk, trav->xlator,
- trav->xlator->fops->fchown, fd, uid, gid);
+ STACK_WIND (frame, stripe_fstat_cbk, trav->xlator,
+ trav->xlator->fops->fstat, fd, NULL);
trav = trav->next;
}
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+err:
+ STRIPE_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL);
return 0;
}
-/**
- * stripe_ftruncate -
- */
int32_t
-stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
+stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- int32_t op_errno = 1;
+ stripe_fd_ctx_t *fctx = NULL;
+ int i, eof_idx;
+ off_t dest_offset, tmp_offset;
+ int32_t op_errno = 1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -2620,37 +3030,115 @@ stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
VALIDATE_OR_GOTO (fd->inode, err);
priv = this->private;
- trav = this->children;
/* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
local->op_ret = -1;
frame->local = local;
- local->inode = fd->inode;
local->call_count = priv->child_count;
-
- while (trav) {
- STACK_WIND (frame, stripe_stack_unwind_buf_cbk, trav->xlator,
- trav->xlator->fops->ftruncate, fd, offset);
- trav = trav->next;
- }
+
+ inode_ctx_get(fd->inode, this, (uint64_t *) &fctx);
+ if (!fctx) {
+ gf_log(this->name, GF_LOG_ERROR, "no stripe context");
+ op_errno = EINVAL;
+ goto err;
+ }
+ if (!fctx->stripe_count) {
+ gf_log(this->name, GF_LOG_ERROR, "no stripe count");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->fctx = fctx;
+ eof_idx = (offset / fctx->stripe_size) % fctx->stripe_count;
+
+ for (i = 0; i < fctx->stripe_count; i++) {
+ if (!fctx->xl_array[i]) {
+ gf_log(this->name, GF_LOG_ERROR, "no xlator at index "
+ "%d", i);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (fctx->stripe_coalesce) {
+ if (i < eof_idx)
+ tmp_offset = roof(offset, fctx->stripe_size *
+ fctx->stripe_count);
+ else if (i > eof_idx)
+ tmp_offset = floor(offset, fctx->stripe_size *
+ fctx->stripe_count);
+ else
+ tmp_offset = offset;
+
+ dest_offset = coalesced_offset(tmp_offset,
+ fctx->stripe_size, fctx->stripe_count);
+ } else {
+ dest_offset = offset;
+ }
+
+ STACK_WIND(frame, stripe_truncate_cbk, fctx->xl_array[i],
+ fctx->xl_array[i]->fops->ftruncate, fd, dest_offset,
+ NULL);
+ }
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+err:
+ STRIPE_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
-/**
- * stripe_fsyncdir -
- */
int32_t
-stripe_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
+stripe_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s returned %s",
+ prev->this->name, strerror (op_errno));
+ local->op_errno = op_errno;
+ if ((op_errno != ENOENT) ||
+ (prev->this == FIRST_CHILD (this)))
+ local->failed = 1;
+ }
+ if (op_ret >= 0)
+ local->op_ret = op_ret;
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (local->failed)
+ local->op_ret = -1;
+
+ STRIPE_STACK_UNWIND (fsyncdir, frame, local->op_ret,
+ local->op_errno, NULL);
+ }
+out:
+ return 0;
+}
+
+int32_t
+stripe_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
@@ -2666,7 +3154,7 @@ stripe_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
trav = this->children;
/* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -2676,29 +3164,114 @@ stripe_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
local->call_count = priv->child_count;
while (trav) {
- STACK_WIND (frame, stripe_stack_unwind_cbk, trav->xlator,
- trav->xlator->fops->fsyncdir, fd, flags);
+ STACK_WIND (frame, stripe_fsyncdir_cbk, trav->xlator,
+ trav->xlator->fops->fsyncdir, fd, flags, NULL);
trav = trav->next;
}
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+err:
+ STRIPE_STACK_UNWIND (fsyncdir, frame, -1, op_errno, NULL);
return 0;
}
-/**
- * stripe_single_readv_cbk - This function is used as return fn, when the
- * file name doesn't match the pattern specified for striping.
- */
int32_t
-stripe_single_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iovec *vector, int32_t count,
- struct stat *stbuf, struct iobref *iobref)
+stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf, iobref);
+ int32_t i = 0;
+ int32_t callcnt = 0;
+ int32_t count = 0;
+ stripe_local_t *local = NULL;
+ struct iovec *vec = NULL;
+ struct iatt tmp_stbuf = {0,};
+ struct iobref *tmp_iobref = NULL;
+ struct iobuf *iobuf = NULL;
+ call_frame_t *prev = NULL;
+
+ if (!this || !frame || !frame->local) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ local = frame->local;
+ prev = cookie;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+ if (op_ret != -1) {
+ correct_file_size(buf, local->fctx, prev);
+ if (local->stbuf_size < buf->ia_size)
+ local->stbuf_size = buf->ia_size;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ op_ret = 0;
+
+ /* Keep extra space for filling in '\0's */
+ vec = GF_CALLOC ((local->count * 2), sizeof (struct iovec),
+ gf_stripe_mt_iovec);
+ if (!vec) {
+ op_ret = -1;
+ goto done;
+ }
+
+ for (i = 0; i < local->wind_count; i++) {
+ if (local->replies[i].op_ret) {
+ memcpy ((vec + count), local->replies[i].vector,
+ (local->replies[i].count * sizeof (struct iovec)));
+ count += local->replies[i].count;
+ op_ret += local->replies[i].op_ret;
+ }
+ if ((local->replies[i].op_ret <
+ local->replies[i].requested_size) &&
+ (local->stbuf_size > (local->offset + op_ret))) {
+ /* Fill in 0s here */
+ vec[count].iov_len =
+ (local->replies[i].requested_size -
+ local->replies[i].op_ret);
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool,
+ vec[count].iov_len);
+ if (!iobuf) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory.");
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto done;
+ }
+ memset (iobuf->ptr, 0, vec[count].iov_len);
+ vec[count].iov_base = iobuf->ptr;
+
+ iobref_add (local->iobref, iobuf);
+ iobuf_unref(iobuf);
+
+ op_ret += vec[count].iov_len;
+ count++;
+ }
+ GF_FREE (local->replies[i].vector);
+ }
+
+ /* FIXME: notice that st_ino, and st_dev (gen) will be
+ * different than what inode will have. Make sure this doesn't
+ * cause any bugs at higher levels */
+ memcpy (&tmp_stbuf, &local->replies[0].stbuf,
+ sizeof (struct iatt));
+ tmp_stbuf.ia_size = local->stbuf_size;
+
+ done:
+ GF_FREE (local->replies);
+ tmp_iobref = local->iobref;
+ STRIPE_STACK_UNWIND (readv, frame, op_ret, op_errno, vec,
+ count, &tmp_stbuf, tmp_iobref, NULL);
+
+ iobref_unref (tmp_iobref);
+ GF_FREE (vec);
+ }
+out:
return 0;
}
@@ -2707,116 +3280,155 @@ stripe_single_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
* to above layer after putting it in a single vector.
*/
int32_t
-stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct stat *stbuf, struct iobref *iobref)
+ int32_t count, struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
{
int32_t index = 0;
int32_t callcnt = 0;
- call_frame_t *main_frame = NULL;
- stripe_local_t *main_local = NULL;
- stripe_local_t *local = frame->local;
+ int32_t final_count = 0;
+ int32_t need_to_check_proper_size = 0;
+ call_frame_t *mframe = NULL;
+ stripe_local_t *mlocal = NULL;
+ stripe_local_t *local = NULL;
+ struct iovec *final_vec = NULL;
+ struct iatt tmp_stbuf = {0,};
+ struct iatt *tmp_stbuf_p = NULL; //need it for a warning
+ struct iobref *tmp_iobref = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
+ call_frame_t *prev = NULL;
- index = local->node_index;
- main_frame = local->orig_frame;
- main_local = main_frame->local;
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto end;
+ }
+
+ local = frame->local;
+ index = local->node_index;
+ prev = cookie;
+ mframe = local->orig_frame;
+ if (!mframe)
+ goto out;
- LOCK (&main_frame->lock);
+ mlocal = mframe->local;
+ if (!mlocal)
+ goto out;
+
+ fctx = mlocal->fctx;
+
+ LOCK (&mframe->lock);
{
- main_local->replies[index].op_ret = op_ret;
- main_local->replies[index].op_errno = op_errno;
+ mlocal->replies[index].op_ret = op_ret;
+ mlocal->replies[index].op_errno = op_errno;
+ mlocal->replies[index].requested_size = local->readv_size;
if (op_ret >= 0) {
- main_local->replies[index].stbuf = *stbuf;
- main_local->replies[index].count = count;
- main_local->replies[index].vector =
- iov_dup (vector, count);
+ mlocal->replies[index].stbuf = *stbuf;
+ mlocal->replies[index].count = count;
+ mlocal->replies[index].vector = iov_dup (vector, count);
+
+ correct_file_size(stbuf, fctx, prev);
+
+ if (local->stbuf_size < stbuf->ia_size)
+ local->stbuf_size = stbuf->ia_size;
+ local->stbuf_blocks += stbuf->ia_blocks;
- if (!main_local->iobref)
- main_local->iobref = iobref_new ();
- iobref_merge (main_local->iobref, iobref);
+ if (!mlocal->iobref)
+ mlocal->iobref = iobref_new ();
+ iobref_merge (mlocal->iobref, iobref);
}
- callcnt = ++main_local->call_count;
+ callcnt = ++mlocal->call_count;
}
- UNLOCK(&main_frame->lock);
-
- if (callcnt == main_local->wind_count) {
- int32_t final_count = 0;
- struct iovec *final_vec = NULL;
- struct stat tmp_stbuf = {0,};
- struct iobref *iobref = NULL;
+ UNLOCK(&mframe->lock);
+ if (callcnt == mlocal->wind_count) {
op_ret = 0;
- memcpy (&tmp_stbuf, &main_local->replies[0].stbuf,
- sizeof (struct stat));
- for (index=0; index < main_local->wind_count; index++) {
- /* TODO: check whether each stripe returned 'expected'
- * number of bytes
- */
- if (main_local->replies[index].op_ret == -1) {
+
+ for (index=0; index < mlocal->wind_count; index++) {
+ /* check whether each stripe returned
+ * 'expected' number of bytes */
+ if (mlocal->replies[index].op_ret == -1) {
op_ret = -1;
- op_errno = main_local->replies[index].op_errno;
+ op_errno = mlocal->replies[index].op_errno;
break;
}
- op_ret += main_local->replies[index].op_ret;
- final_count += main_local->replies[index].count;
- /* TODO: Do I need to send anything more in stbuf? */
- if (tmp_stbuf.st_size <
- main_local->replies[index].stbuf.st_size) {
- tmp_stbuf.st_size =
- main_local->replies[index].stbuf.st_size;
+ /* TODO: handle the 'holes' within the read range
+ properly */
+ if (mlocal->replies[index].op_ret <
+ mlocal->replies[index].requested_size) {
+ need_to_check_proper_size = 1;
}
+
+ op_ret += mlocal->replies[index].op_ret;
+ mlocal->count += mlocal->replies[index].count;
}
- if (op_ret != -1) {
- final_vec = CALLOC (final_count,
- sizeof (struct iovec));
- if (!final_vec) {
- op_ret = -1;
- final_count = 0;
- goto done;
- }
+ if (op_ret == -1)
+ goto done;
+ if (need_to_check_proper_size)
+ goto check_size;
- final_count = 0;
+ final_vec = GF_CALLOC (mlocal->count, sizeof (struct iovec),
+ gf_stripe_mt_iovec);
- for (index=0;
- index < main_local->wind_count; index++) {
- memcpy (final_vec + final_count,
- main_local->replies[index].vector,
- (main_local->replies[index].count *
- sizeof (struct iovec)));
- final_count +=
- main_local->replies[index].count;
+ if (!final_vec) {
+ op_ret = -1;
+ goto done;
+ }
- free (main_local->replies[index].vector);
- }
- } else {
- final_vec = NULL;
- final_count = 0;
+ for (index = 0; index < mlocal->wind_count; index++) {
+ memcpy ((final_vec + final_count),
+ mlocal->replies[index].vector,
+ (mlocal->replies[index].count *
+ sizeof (struct iovec)));
+ final_count += mlocal->replies[index].count;
+ GF_FREE (mlocal->replies[index].vector);
}
+ /* FIXME: notice that st_ino, and st_dev (gen) will be
+ * different than what inode will have. Make sure this doesn't
+ * cause any bugs at higher levels */
+ memcpy (&tmp_stbuf, &mlocal->replies[0].stbuf,
+ sizeof (struct iatt));
+ tmp_stbuf.ia_size = local->stbuf_size;
+ tmp_stbuf.ia_blocks = local->stbuf_blocks;
+
done:
/* */
- FREE (main_local->replies);
- iobref = main_local->iobref;
- STACK_UNWIND (main_frame, op_ret, op_errno,
- final_vec, final_count, &tmp_stbuf, iobref);
+ GF_FREE (mlocal->replies);
+ tmp_iobref = mlocal->iobref;
+ /* work around for nfs truncated read. Bug 3774 */
+ tmp_stbuf_p = &tmp_stbuf;
+ WIPE (tmp_stbuf_p);
+ STRIPE_STACK_UNWIND (readv, mframe, op_ret, op_errno, final_vec,
+ final_count, &tmp_stbuf, tmp_iobref, NULL);
+
+ iobref_unref (tmp_iobref);
+ GF_FREE (final_vec);
+ }
+
+ goto out;
+
+check_size:
+ mlocal->call_count = fctx->stripe_count;
- iobref_unref (iobref);
- if (final_vec)
- FREE (final_vec);
+ for (index = 0; index < fctx->stripe_count; index++) {
+ STACK_WIND (mframe, stripe_readv_fstat_cbk,
+ (fctx->xl_array[index]),
+ (fctx->xl_array[index])->fops->fstat,
+ mlocal->fd, NULL);
}
- STACK_DESTROY (frame->root);
+out:
+ STRIPE_STACK_DESTROY (frame);
+end:
return 0;
}
-/**
- * stripe_readv -
- */
+
int32_t
stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t offset)
+ size_t size, off_t offset, uint32_t flags, dict_t *xdata)
{
- int32_t op_errno = 1;
+ int32_t op_errno = EINVAL;
int32_t idx = 0;
int32_t index = 0;
int32_t num_stripe = 0;
@@ -2827,17 +3439,18 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
uint64_t stripe_size = 0;
off_t rounded_start = 0;
off_t frame_offset = offset;
+ off_t dest_offset = 0;
stripe_local_t *local = NULL;
call_frame_t *rframe = NULL;
stripe_local_t *rlocal = NULL;
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
stripe_fd_ctx_t *fctx = NULL;
- trav = this->children;
- priv = this->private;
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
- fd_ctx_get (fd, this, &tmp_fctx);
+ inode_ctx_get (fd->inode, this, &tmp_fctx);
if (!tmp_fctx) {
op_errno = EBADFD;
goto err;
@@ -2845,122 +3458,183 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
stripe_size = fctx->stripe_size;
- /* The file is stripe across the child nodes. Send the read request
- * to the child nodes appropriately after checking which region of
+ STRIPE_VALIDATE_FCTX (fctx, err);
+
+ if (!stripe_size) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Wrong stripe size for the file");
+ goto err;
+ }
+ /* The file is stripe across the child nodes. Send the read request
+ * to the child nodes appropriately after checking which region of
* the file is in which child node. Always '0-<stripe_size>' part of
* the file resides in the first child.
*/
rounded_start = floor (offset, stripe_size);
rounded_end = roof (offset+size, stripe_size);
- num_stripe = (rounded_end - rounded_start) / stripe_size;
-
- local = CALLOC (1, sizeof (stripe_local_t));
+ num_stripe = (rounded_end- rounded_start)/stripe_size;
+
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
- local->wind_count = num_stripe;
frame->local = local;
-
+
/* This is where all the vectors should be copied. */
- local->replies = CALLOC (num_stripe, sizeof (struct readv_replies));
+ local->replies = GF_CALLOC (num_stripe, sizeof (struct stripe_replies),
+ gf_stripe_mt_stripe_replies);
if (!local->replies) {
op_errno = ENOMEM;
goto err;
}
-
+
off_index = (offset / stripe_size) % fctx->stripe_count;
-
+ local->wind_count = num_stripe;
+ local->readv_size = size;
+ local->offset = offset;
+ local->fd = fd_ref (fd);
+ local->fctx = fctx;
+
for (index = off_index; index < (num_stripe + off_index); index++) {
rframe = copy_frame (frame);
- rlocal = CALLOC (1, sizeof (stripe_local_t));
+ rlocal = mem_get0 (this->local_pool);
if (!rlocal) {
op_errno = ENOMEM;
goto err;
}
-
+
frame_size = min (roof (frame_offset+1, stripe_size),
(offset + size)) - frame_offset;
-
+
rlocal->node_index = index - off_index;
rlocal->orig_frame = frame;
+ rlocal->readv_size = frame_size;
rframe->local = rlocal;
idx = (index % fctx->stripe_count);
+
+ if (fctx->stripe_coalesce)
+ dest_offset = coalesced_offset(frame_offset,
+ stripe_size, fctx->stripe_count);
+ else
+ dest_offset = frame_offset;
+
STACK_WIND (rframe, stripe_readv_cbk, fctx->xl_array[idx],
fctx->xl_array[idx]->fops->readv,
- fd, frame_size, frame_offset);
-
+ fd, frame_size, dest_offset, flags, xdata);
+
frame_offset += frame_size;
}
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+err:
+ if (rframe)
+ STRIPE_STACK_DESTROY (rframe);
+
+ STRIPE_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
return 0;
}
-/**
- * stripe_writev_cbk -
- */
int32_t
stripe_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *stbuf)
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
+ stripe_local_t *mlocal = NULL;
+ call_frame_t *prev = NULL;
+ call_frame_t *mframe = NULL;
+ struct stripe_replies *reply = NULL;
+ int32_t i = 0;
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
local = frame->local;
+ mframe = local->orig_frame;
+ mlocal = mframe->local;
LOCK(&frame->lock);
{
- callcnt = ++local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned error %s",
- ((call_frame_t *)cookie)->this->name,
- strerror (op_errno));
- local->op_errno = op_errno;
- local->op_ret = -1;
- }
+ callcnt = ++mlocal->call_count;
+
+ mlocal->replies[local->node_index].op_ret = op_ret;
+ mlocal->replies[local->node_index].op_errno = op_errno;
+
if (op_ret >= 0) {
- local->op_ret += op_ret;
- local->stbuf = *stbuf;
+ mlocal->post_buf = *postbuf;
+ mlocal->pre_buf = *prebuf;
+
+ mlocal->prebuf_blocks += prebuf->ia_blocks;
+ mlocal->postbuf_blocks += postbuf->ia_blocks;
+
+ correct_file_size(prebuf, mlocal->fctx, prev);
+ correct_file_size(postbuf, mlocal->fctx, prev);
+
+ if (mlocal->prebuf_size < prebuf->ia_size)
+ mlocal->prebuf_size = prebuf->ia_size;
+ if (mlocal->postbuf_size < postbuf->ia_size)
+ mlocal->postbuf_size = postbuf->ia_size;
}
}
UNLOCK (&frame->lock);
- if ((callcnt == local->wind_count) && local->unwind) {
- STACK_UNWIND (frame, local->op_ret,
- local->op_errno, &local->stbuf);
+ if ((callcnt == mlocal->wind_count) && mlocal->unwind) {
+ mlocal->pre_buf.ia_size = mlocal->prebuf_size;
+ mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks;
+ mlocal->post_buf.ia_size = mlocal->postbuf_size;
+ mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks;
+
+ /*
+ * Only return the number of consecutively written bytes up until
+ * the first error. Only return an error if it occurs first.
+ *
+ * When a short write occurs, the application should retry at the
+ * appropriate offset, at which point we'll potentially pass back
+ * the error.
+ */
+ for (i = 0, reply = mlocal->replies; i < mlocal->wind_count;
+ i++, reply++) {
+ if (reply->op_ret == -1) {
+ gf_log(this->name, GF_LOG_DEBUG, "reply %d "
+ "returned error %s", i,
+ strerror(reply->op_errno));
+ if (!mlocal->op_ret) {
+ mlocal->op_ret = -1;
+ mlocal->op_errno = reply->op_errno;
+ }
+ break;
+ }
+
+ mlocal->op_ret += reply->op_ret;
+
+ if (reply->op_ret < reply->requested_size)
+ break;
+ }
+
+ GF_FREE(mlocal->replies);
+
+ STRIPE_STACK_UNWIND (writev, mframe, mlocal->op_ret,
+ mlocal->op_errno, &mlocal->pre_buf,
+ &mlocal->post_buf, NULL);
}
+out:
+ STRIPE_STACK_DESTROY(frame);
return 0;
}
-
-/**
- * stripe_single_writev_cbk -
- */
-int32_t
-stripe_single_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *stbuf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, stbuf);
- return 0;
-}
-/**
- * stripe_writev -
- */
int32_t
stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref)
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
- struct iovec *tmp_vec = vector;
- stripe_private_t *priv = NULL;
+ struct iovec *tmp_vec = NULL;
stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = 1;
int32_t idx = 0;
@@ -2971,10 +3645,19 @@ stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
off_t fill_size = 0;
uint64_t stripe_size = 0;
uint64_t tmp_fctx = 0;
+ off_t dest_offset = 0;
+ off_t rounded_start = 0;
+ off_t rounded_end = 0;
+ int32_t total_chunks = 0;
+ call_frame_t *wframe = NULL;
+ stripe_local_t *wlocal = NULL;
- priv = this->private;
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
- fd_ctx_get (fd, this, &tmp_fctx);
+ inode_ctx_get (fd->inode, this, &tmp_fctx);
if (!tmp_fctx) {
op_errno = EINVAL;
goto err;
@@ -2982,29 +3665,57 @@ stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
stripe_size = fctx->stripe_size;
+ STRIPE_VALIDATE_FCTX (fctx, err);
+
/* File has to be stripped across the child nodes */
for (idx = 0; idx< count; idx ++) {
- total_size += tmp_vec[idx].iov_len;
+ total_size += vector[idx].iov_len;
}
remaining_size = total_size;
- local = CALLOC (1, sizeof (stripe_local_t));
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
frame->local = local;
local->stripe_size = stripe_size;
+ local->fctx = fctx;
+
+ if (!stripe_size) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Wrong stripe size for the file");
+ op_errno = EINVAL;
+ goto err;
+ }
+ rounded_start = floor(offset, stripe_size);
+ rounded_end = roof(offset + total_size, stripe_size);
+ total_chunks = (rounded_end - rounded_start) / stripe_size;
+ local->replies = GF_CALLOC(total_chunks, sizeof(struct stripe_replies),
+ gf_stripe_mt_stripe_replies);
+ if (!local->replies) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ total_chunks = 0;
while (1) {
- /* Send striped chunk of the vector to child
+ wframe = copy_frame(frame);
+ wlocal = mem_get0(this->local_pool);
+ if (!wlocal) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ wlocal->orig_frame = frame;
+ wframe->local = wlocal;
+
+ /* Send striped chunk of the vector to child
nodes appropriately. */
- trav = this->children;
-
- idx = (((offset + offset_offset) /
+ idx = (((offset + offset_offset) /
local->stripe_size) % fctx->stripe_count);
- fill_size = (local->stripe_size -
+ fill_size = (local->stripe_size -
((offset + offset_offset) % local->stripe_size));
if (fill_size > remaining_size)
fill_size = remaining_size;
@@ -3013,158 +3724,619 @@ stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
tmp_count = iov_subset (vector, count, offset_offset,
offset_offset + fill_size, NULL);
- tmp_vec = CALLOC (tmp_count, sizeof (struct iovec));
+ tmp_vec = GF_CALLOC (tmp_count, sizeof (struct iovec),
+ gf_stripe_mt_iovec);
if (!tmp_vec) {
op_errno = ENOMEM;
goto err;
}
tmp_count = iov_subset (vector, count, offset_offset,
offset_offset + fill_size, tmp_vec);
-
+
local->wind_count++;
if (remaining_size == 0)
local->unwind = 1;
- STACK_WIND(frame, stripe_writev_cbk, fctx->xl_array[idx],
- fctx->xl_array[idx]->fops->writev, fd, tmp_vec,
- tmp_count, offset + offset_offset, iobref);
- FREE (tmp_vec);
+ /*
+ * Store off the request index (with respect to the chunk of the
+ * initial offset) and the size of the request. This is required
+ * in the callback to calculate an appropriate return value in
+ * the event of a write failure in one or more requests.
+ */
+ wlocal->node_index = total_chunks;
+ local->replies[total_chunks].requested_size = fill_size;
+
+ dest_offset = offset + offset_offset;
+ if (fctx->stripe_coalesce)
+ dest_offset = coalesced_offset(dest_offset,
+ local->stripe_size, fctx->stripe_count);
+
+ STACK_WIND (wframe, stripe_writev_cbk, fctx->xl_array[idx],
+ fctx->xl_array[idx]->fops->writev, fd, tmp_vec,
+ tmp_count, dest_offset, flags, iobref,
+ xdata);
+
+ GF_FREE (tmp_vec);
offset_offset += fill_size;
+ total_chunks++;
if (remaining_size == 0)
break;
}
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+err:
+ if (wframe)
+ STRIPE_STACK_DESTROY(wframe);
+
+ STRIPE_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
+int32_t
+stripe_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ stripe_local_t *mlocal = NULL;
+ call_frame_t *prev = NULL;
+ call_frame_t *mframe = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
+ mframe = local->orig_frame;
+ mlocal = mframe->local;
+
+ LOCK(&frame->lock);
+ {
+ callcnt = ++mlocal->call_count;
+
+ if (op_ret == 0) {
+ mlocal->post_buf = *postbuf;
+ mlocal->pre_buf = *prebuf;
+
+ mlocal->prebuf_blocks += prebuf->ia_blocks;
+ mlocal->postbuf_blocks += postbuf->ia_blocks;
+
+ correct_file_size(prebuf, mlocal->fctx, prev);
+ correct_file_size(postbuf, mlocal->fctx, prev);
+
+ if (mlocal->prebuf_size < prebuf->ia_size)
+ mlocal->prebuf_size = prebuf->ia_size;
+ if (mlocal->postbuf_size < postbuf->ia_size)
+ mlocal->postbuf_size = postbuf->ia_size;
+ }
+
+ /* return the first failure */
+ if (mlocal->op_ret == 0) {
+ mlocal->op_ret = op_ret;
+ mlocal->op_errno = op_errno;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if ((callcnt == mlocal->wind_count) && mlocal->unwind) {
+ mlocal->pre_buf.ia_size = mlocal->prebuf_size;
+ mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks;
+ mlocal->post_buf.ia_size = mlocal->postbuf_size;
+ mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks;
+
+ STRIPE_STACK_UNWIND (fallocate, mframe, mlocal->op_ret,
+ mlocal->op_errno, &mlocal->pre_buf,
+ &mlocal->post_buf, NULL);
+ }
+out:
+ STRIPE_STACK_DESTROY(frame);
+ return 0;
+}
+
+int32_t
+stripe_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
+ int32_t op_errno = 1;
+ int32_t idx = 0;
+ int32_t offset_offset = 0;
+ int32_t remaining_size = 0;
+ off_t fill_size = 0;
+ uint64_t stripe_size = 0;
+ uint64_t tmp_fctx = 0;
+ off_t dest_offset = 0;
+ call_frame_t *fframe = NULL;
+ stripe_local_t *flocal = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+
+ inode_ctx_get (fd->inode, this, &tmp_fctx);
+ if (!tmp_fctx) {
+ op_errno = EINVAL;
+ goto err;
+ }
+ fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
+ stripe_size = fctx->stripe_size;
+
+ STRIPE_VALIDATE_FCTX (fctx, err);
+
+ remaining_size = len;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ frame->local = local;
+ local->stripe_size = stripe_size;
+ local->fctx = fctx;
+
+ if (!stripe_size) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Wrong stripe size for the file");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ while (1) {
+ fframe = copy_frame(frame);
+ flocal = mem_get0(this->local_pool);
+ if (!flocal) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ flocal->orig_frame = frame;
+ fframe->local = flocal;
+
+ /* send fallocate request to the associated child node */
+ idx = (((offset + offset_offset) /
+ local->stripe_size) % fctx->stripe_count);
+
+ fill_size = (local->stripe_size -
+ ((offset + offset_offset) % local->stripe_size));
+ if (fill_size > remaining_size)
+ fill_size = remaining_size;
+
+ remaining_size -= fill_size;
+
+ local->wind_count++;
+ if (remaining_size == 0)
+ local->unwind = 1;
+
+ dest_offset = offset + offset_offset;
+ if (fctx->stripe_coalesce)
+ dest_offset = coalesced_offset(dest_offset,
+ local->stripe_size, fctx->stripe_count);
+
+ /*
+ * TODO: Create a separate handler for coalesce mode that sends a
+ * single fallocate per-child (since the ranges are linear).
+ */
+ STACK_WIND(fframe, stripe_fallocate_cbk, fctx->xl_array[idx],
+ fctx->xl_array[idx]->fops->fallocate, fd, mode,
+ dest_offset, fill_size, xdata);
+
+ offset_offset += fill_size;
+ if (remaining_size == 0)
+ break;
+ }
+
+ return 0;
+err:
+ if (fframe)
+ STRIPE_STACK_DESTROY(fframe);
+
+ STRIPE_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
-/* Management operations */
-/**
- * stripe_stats_cbk - Add all the fields received from different clients.
- * Once all the clients return, send stats to above layer.
- *
- */
int32_t
-stripe_stats_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct xlator_stats *stats)
+stripe_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
+ stripe_local_t *mlocal = NULL;
+ call_frame_t *prev = NULL;
+ call_frame_t *mframe = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+ prev = cookie;
local = frame->local;
+ mframe = local->orig_frame;
+ mlocal = mframe->local;
LOCK(&frame->lock);
{
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned error %s",
- ((call_frame_t *)cookie)->this->name,
- strerror (op_errno));
- local->op_ret = -1;
- local->op_errno = op_errno;
- }
+ callcnt = ++mlocal->call_count;
+
if (op_ret == 0) {
- if (local->op_ret == -2) {
- /* This is to make sure this is the
- first time */
- local->stats = *stats;
- local->op_ret = 0;
- } else {
- local->stats.nr_files += stats->nr_files;
- local->stats.free_disk += stats->free_disk;
- local->stats.disk_usage += stats->disk_usage;
- local->stats.nr_clients += stats->nr_clients;
- }
+ mlocal->post_buf = *postbuf;
+ mlocal->pre_buf = *prebuf;
+
+ mlocal->prebuf_blocks += prebuf->ia_blocks;
+ mlocal->postbuf_blocks += postbuf->ia_blocks;
+
+ correct_file_size(prebuf, mlocal->fctx, prev);
+ correct_file_size(postbuf, mlocal->fctx, prev);
+
+ if (mlocal->prebuf_size < prebuf->ia_size)
+ mlocal->prebuf_size = prebuf->ia_size;
+ if (mlocal->postbuf_size < postbuf->ia_size)
+ mlocal->postbuf_size = postbuf->ia_size;
}
+
+ /* return the first failure */
+ if (mlocal->op_ret == 0) {
+ mlocal->op_ret = op_ret;
+ mlocal->op_errno = op_errno;
+ }
}
UNLOCK (&frame->lock);
- if (!callcnt) {
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stats);
- }
+ if ((callcnt == mlocal->wind_count) && mlocal->unwind) {
+ mlocal->pre_buf.ia_size = mlocal->prebuf_size;
+ mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks;
+ mlocal->post_buf.ia_size = mlocal->postbuf_size;
+ mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks;
+ STRIPE_STACK_UNWIND (discard, mframe, mlocal->op_ret,
+ mlocal->op_errno, &mlocal->pre_buf,
+ &mlocal->post_buf, NULL);
+ }
+out:
+ STRIPE_STACK_DESTROY(frame);
return 0;
}
-/**
- * stripe_stats -
- */
int32_t
-stripe_stats (call_frame_t *frame, xlator_t *this, int32_t flags)
+stripe_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
{
stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = 1;
+ int32_t idx = 0;
+ int32_t offset_offset = 0;
+ int32_t remaining_size = 0;
+ off_t fill_size = 0;
+ uint64_t stripe_size = 0;
+ uint64_t tmp_fctx = 0;
+ off_t dest_offset = 0;
+ call_frame_t *fframe = NULL;
+ stripe_local_t *flocal = NULL;
- priv = this->private;
- trav = this->children;
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+
+ inode_ctx_get (fd->inode, this, &tmp_fctx);
+ if (!tmp_fctx) {
+ op_errno = EINVAL;
+ goto err;
+ }
+ fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
+ stripe_size = fctx->stripe_size;
+
+ STRIPE_VALIDATE_FCTX (fctx, err);
+
+ remaining_size = len;
- local = CALLOC (1, sizeof (stripe_local_t));
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
frame->local = local;
- local->op_ret = -2; /* to be used as a flag in _cbk */
- local->call_count = priv->child_count;
+ local->stripe_size = stripe_size;
+ local->fctx = fctx;
- while (trav) {
- STACK_WIND (frame, stripe_stats_cbk, trav->xlator,
- trav->xlator->mops->stats, flags);
- trav = trav->next;
+ if (!stripe_size) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Wrong stripe size for the file");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ while (1) {
+ fframe = copy_frame(frame);
+ flocal = mem_get0(this->local_pool);
+ if (!flocal) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ flocal->orig_frame = frame;
+ fframe->local = flocal;
+
+ /* send discard request to the associated child node */
+ idx = (((offset + offset_offset) /
+ local->stripe_size) % fctx->stripe_count);
+
+ fill_size = (local->stripe_size -
+ ((offset + offset_offset) % local->stripe_size));
+ if (fill_size > remaining_size)
+ fill_size = remaining_size;
+
+ remaining_size -= fill_size;
+
+ local->wind_count++;
+ if (remaining_size == 0)
+ local->unwind = 1;
+
+ dest_offset = offset + offset_offset;
+ if (fctx->stripe_coalesce)
+ dest_offset = coalesced_offset(dest_offset,
+ local->stripe_size, fctx->stripe_count);
+
+ /*
+ * TODO: Create a separate handler for coalesce mode that sends a
+ * single discard per-child (since the ranges are linear).
+ */
+ STACK_WIND(fframe, stripe_discard_cbk, fctx->xl_array[idx],
+ fctx->xl_array[idx]->fops->discard, fd, dest_offset,
+ fill_size, xdata);
+
+ offset_offset += fill_size;
+ if (remaining_size == 0)
+ break;
}
+
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+err:
+ if (fframe)
+ STRIPE_STACK_DESTROY(fframe);
+
+ STRIPE_STACK_UNWIND (discard, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+stripe_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ stripe_local_t *mlocal = NULL;
+ call_frame_t *prev = NULL;
+ call_frame_t *mframe = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
+ mframe = local->orig_frame;
+ mlocal = mframe->local;
+
+ LOCK(&frame->lock);
+ {
+ callcnt = ++mlocal->call_count;
+
+ if (op_ret == 0) {
+ mlocal->post_buf = *postbuf;
+ mlocal->pre_buf = *prebuf;
+
+ mlocal->prebuf_blocks += prebuf->ia_blocks;
+ mlocal->postbuf_blocks += postbuf->ia_blocks;
+
+ correct_file_size(prebuf, mlocal->fctx, prev);
+ correct_file_size(postbuf, mlocal->fctx, prev);
+
+ if (mlocal->prebuf_size < prebuf->ia_size)
+ mlocal->prebuf_size = prebuf->ia_size;
+ if (mlocal->postbuf_size < postbuf->ia_size)
+ mlocal->postbuf_size = postbuf->ia_size;
+ }
+
+ /* return the first failure */
+ if (mlocal->op_ret == 0) {
+ mlocal->op_ret = op_ret;
+ mlocal->op_errno = op_errno;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if ((callcnt == mlocal->wind_count) && mlocal->unwind) {
+ mlocal->pre_buf.ia_size = mlocal->prebuf_size;
+ mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks;
+ mlocal->post_buf.ia_size = mlocal->postbuf_size;
+ mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks;
+
+ STRIPE_STACK_UNWIND (zerofill, mframe, mlocal->op_ret,
+ mlocal->op_errno, &mlocal->pre_buf,
+ &mlocal->post_buf, NULL);
+ }
+out:
+ STRIPE_STACK_DESTROY(frame);
+ return 0;
+}
+
+int32_t
+stripe_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
+ int32_t op_errno = 1;
+ int32_t idx = 0;
+ int32_t offset_offset = 0;
+ int32_t remaining_size = 0;
+ off_t fill_size = 0;
+ uint64_t stripe_size = 0;
+ uint64_t tmp_fctx = 0;
+ off_t dest_offset = 0;
+ call_frame_t *fframe = NULL;
+ stripe_local_t *flocal = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+
+ inode_ctx_get (fd->inode, this, &tmp_fctx);
+ if (!tmp_fctx) {
+ op_errno = EINVAL;
+ goto err;
+ }
+ fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
+ stripe_size = fctx->stripe_size;
+
+ STRIPE_VALIDATE_FCTX (fctx, err);
+
+ remaining_size = len;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
+ stripe_size = fctx->stripe_size;
+
+ STRIPE_VALIDATE_FCTX (fctx, err);
+
+ remaining_size = len;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
+ stripe_size = fctx->stripe_size;
+
+ STRIPE_VALIDATE_FCTX (fctx, err);
+
+ remaining_size = len;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ frame->local = local;
+ local->stripe_size = stripe_size;
+ local->fctx = fctx;
+
+ if (!stripe_size) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Wrong stripe size for the file");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ while (1) {
+ fframe = copy_frame(frame);
+ flocal = mem_get0(this->local_pool);
+ if (!flocal) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ flocal->orig_frame = frame;
+ fframe->local = flocal;
+
+ idx = (((offset + offset_offset) /
+ local->stripe_size) % fctx->stripe_count);
+
+ fill_size = (local->stripe_size -
+ ((offset + offset_offset) % local->stripe_size));
+ if (fill_size > remaining_size)
+ fill_size = remaining_size;
+
+ remaining_size -= fill_size;
+
+ local->wind_count++;
+ if (remaining_size == 0)
+ local->unwind = 1;
+
+ dest_offset = offset + offset_offset;
+ if (fctx->stripe_coalesce)
+ dest_offset = coalesced_offset(dest_offset,
+ local->stripe_size,
+ fctx->stripe_count);
+
+ STACK_WIND(fframe, stripe_zerofill_cbk, fctx->xl_array[idx],
+ fctx->xl_array[idx]->fops->zerofill, fd,
+ dest_offset, fill_size, xdata);
+ offset_offset += fill_size;
+ if (remaining_size == 0)
+ break;
+ }
+
+ return 0;
+err:
+ if (fframe)
+ STRIPE_STACK_DESTROY(fframe);
+
+ STRIPE_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
int32_t
stripe_release (xlator_t *this, fd_t *fd)
{
+ return 0;
+}
+
+int
+stripe_forget (xlator_t *this, inode_t *inode)
+{
uint64_t tmp_fctx = 0;
stripe_fd_ctx_t *fctx = NULL;
- fd_ctx_del (fd, this, &tmp_fctx);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (inode, err);
+
+ (void) inode_ctx_del (inode, this, &tmp_fctx);
if (!tmp_fctx) {
- goto out;
+ goto err;
}
-
+
fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
if (!fctx->static_array)
- FREE (fctx->xl_array);
-
- FREE (fctx);
-
- out:
- return 0;
+ GF_FREE (fctx->xl_array);
+
+ GF_FREE (fctx);
+err:
+ return 0;
}
-/**
- * notify
- */
int32_t
notify (xlator_t *this, int32_t event, void *data, ...)
{
stripe_private_t *priv = NULL;
int down_client = 0;
int i = 0;
+ gf_boolean_t heard_from_all_children = _gf_false;
+
+ if (!this)
+ return 0;
priv = this->private;
if (!priv)
return 0;
- switch (event)
+ switch (event)
{
case GF_EVENT_CHILD_UP:
{
@@ -3173,24 +4345,28 @@ notify (xlator_t *this, int32_t event, void *data, ...)
if (data == priv->xl_array[i])
break;
}
- priv->state[i] = 1;
- for (i = 0; i < priv->child_count; i++) {
- if (!priv->state[i])
- down_client++;
+
+ if (priv->child_count == i) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "got GF_EVENT_CHILD_UP bad subvolume %s",
+ data? ((xlator_t *)data)->name: NULL);
+ break;
}
LOCK (&priv->lock);
{
- priv->nodes_down = down_client;
-
- if (data == FIRST_CHILD (this)) {
+ if (data == FIRST_CHILD (this))
priv->first_child_down = 0;
- default_notify (this, event, data);
- }
+ priv->last_event[i] = event;
}
UNLOCK (&priv->lock);
}
break;
+ case GF_EVENT_CHILD_CONNECTING:
+ {
+ // 'CONNECTING' doesn't ensure its CHILD_UP, so do nothing
+ goto out;
+ }
case GF_EVENT_CHILD_DOWN:
{
/* get an index number to set */
@@ -3198,20 +4374,19 @@ notify (xlator_t *this, int32_t event, void *data, ...)
if (data == priv->xl_array[i])
break;
}
- priv->state[i] = 0;
- for (i = 0; i < priv->child_count; i++) {
- if (!priv->state[i])
- down_client++;
+
+ if (priv->child_count == i) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "got GF_EVENT_CHILD_DOWN bad subvolume %s",
+ data? ((xlator_t *)data)->name: NULL);
+ break;
}
LOCK (&priv->lock);
{
- priv->nodes_down = down_client;
-
- if (data == FIRST_CHILD (this)) {
+ if (data == FIRST_CHILD (this))
priv->first_child_down = 1;
- default_notify (this, event, data);
- }
+ priv->last_event[i] = event;
}
UNLOCK (&priv->lock);
}
@@ -3221,82 +4396,751 @@ notify (xlator_t *this, int32_t event, void *data, ...)
{
/* */
default_notify (this, event, data);
+ goto out;
}
break;
}
+ // Consider child as down if it's last_event is not CHILD_UP
+ for (i = 0, down_client = 0; i < priv->child_count; i++)
+ if (priv->last_event[i] != GF_EVENT_CHILD_UP)
+ down_client++;
+
+ LOCK (&priv->lock);
+ {
+ priv->nodes_down = down_client;
+ }
+ UNLOCK (&priv->lock);
+
+ heard_from_all_children = _gf_true;
+ for (i = 0; i < priv->child_count; i++)
+ if (!priv->last_event[i])
+ heard_from_all_children = _gf_false;
+
+ if (heard_from_all_children)
+ default_notify (this, event, data);
+out:
return 0;
}
int
-set_stripe_block_size (xlator_t *this, stripe_private_t *priv, char *data)
+stripe_setxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno, dict_t *xdata)
{
- int ret = -1;
- char *tmp_str = NULL;
- char *tmp_str1 = NULL;
- char *dup_str = NULL;
- char *stripe_str = NULL;
- char *pattern = NULL;
- char *num = NULL;
- struct stripe_options *temp_stripeopt = NULL;
- struct stripe_options *stripe_opt = NULL;
-
- /* Get the pattern for striping.
- "option block-size *avi:10MB" etc */
- stripe_str = strtok_r (data, ",", &tmp_str);
- while (stripe_str) {
- dup_str = strdup (stripe_str);
- stripe_opt = CALLOC (1, sizeof (struct stripe_options));
- if (!stripe_opt)
- goto out;
+ int ret = -1;
+ int call_cnt = 0;
+ stripe_local_t *local = NULL;
+
+ if (!frame || !frame->local || !this) {
+ gf_log ("", GF_LOG_ERROR, "Possible NULL deref");
+ return ret;
+ }
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ call_cnt = --local->wind_count;
+
+ /**
+ * We overwrite ->op_* values here for subsequent faliure
+ * conditions, hence we propogate the last errno down the
+ * stack.
+ */
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto unlock;
+ }
+ }
+
+ unlock:
+ UNLOCK (&frame->lock);
+
+ if (!call_cnt) {
+ STRIPE_STACK_UNWIND (setxattr, frame, local->op_ret,
+ local->op_errno, xdata);
+ }
+
+ return 0;
+}
+
+#ifdef HAVE_BD_XLATOR
+int
+stripe_is_bd (dict_t *this, char *key, data_t *value, void *data)
+{
+ gf_boolean_t *is_bd = data;
+
+ if (data == NULL)
+ return 0;
+
+ if (XATTR_IS_BD (key))
+ *is_bd = _gf_true;
+
+ return 0;
+}
+
+inline gf_boolean_t
+stripe_setxattr_is_bd (dict_t *dict)
+{
+ gf_boolean_t is_bd = _gf_false;
+
+ if (dict == NULL)
+ goto out;
+
+ dict_foreach (dict, stripe_is_bd, &is_bd);
+out:
+ return is_bd;
+}
+#else
+#define stripe_setxattr_is_bd(dict) _gf_false
+#endif
+
+int
+stripe_setxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *dict, int flags, dict_t *xdata)
+{
+ int32_t op_errno = EINVAL;
+ xlator_list_t *trav = NULL;
+ stripe_private_t *priv = NULL;
+ stripe_local_t *local = NULL;
+ int i = 0;
+ gf_boolean_t is_bd = _gf_false;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.*stripe*", dict,
+ op_errno, err);
+
+ priv = this->private;
+ trav = this->children;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ frame->local = local;
+ local->wind_count = priv->child_count;
+ local->op_ret = local->op_errno = 0;
+
+ is_bd = stripe_setxattr_is_bd (dict);
+
+ /**
+ * Set xattrs for directories on all subvolumes. Additionally
+ * this power is only given to a special client. Bd xlator
+ * also needs xattrs for regular files (ie LVs)
+ */
+ if (((frame->root->pid == GF_CLIENT_PID_GSYNCD) &&
+ IA_ISDIR (loc->inode->ia_type)) || is_bd) {
+ for (i = 0; i < priv->child_count; i++, trav = trav->next) {
+ STACK_WIND (frame, stripe_setxattr_cbk,
+ trav->xlator, trav->xlator->fops->setxattr,
+ loc, dict, flags, xdata);
+ }
+ } else {
+ local->wind_count = 1;
+ STACK_WIND (frame, stripe_setxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ loc, dict, flags, xdata);
+ }
+
+ return 0;
+err:
+ STRIPE_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
+ return 0;
+}
+
+
+int
+stripe_fsetxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno, dict_t *xdata)
+{
+ STRIPE_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+
+int
+stripe_is_special_key (dict_t *this,
+ char *key,
+ data_t *value,
+ void *data)
+{
+ gf_boolean_t *is_special = NULL;
+
+ if (data == NULL) {
+ goto out;
+ }
+
+ is_special = data;
+
+ if (XATTR_IS_LOCKINFO (key) || XATTR_IS_BD (key))
+ *is_special = _gf_true;
+
+out:
+ return 0;
+}
+
+int32_t
+stripe_fsetxattr_everyone_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
+{
+ int call_count = 0;
+ stripe_local_t *local = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ call_count = --local->wind_count;
- pattern = strtok_r (dup_str, ":", &tmp_str1);
- num = strtok_r (NULL, ":", &tmp_str1);
- if (!num) {
- num = pattern;
- pattern = "*";
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
}
- if (gf_string2bytesize (num, &stripe_opt->block_size) != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid number format \"%s\"", num);
+ }
+ UNLOCK (&frame->lock);
+
+ if (call_count == 0) {
+ STRIPE_STACK_UNWIND (fsetxattr, frame, local->op_ret,
+ local->op_errno, NULL);
+ }
+ return 0;
+}
+
+int
+stripe_fsetxattr_to_everyone (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int flags, dict_t *xdata)
+{
+ xlator_list_t *trav = NULL;
+ stripe_private_t *priv = NULL;
+ int ret = -1;
+ stripe_local_t *local = NULL;
+
+ priv = this->private;
+
+ local = mem_get0 (this->local_pool);
+ if (local == NULL) {
+ goto out;
+ }
+
+ frame->local = local;
+
+ local->wind_count = priv->child_count;
+
+ trav = this->children;
+
+ while (trav) {
+ STACK_WIND (frame, stripe_fsetxattr_everyone_cbk,
+ trav->xlator, trav->xlator->fops->fsetxattr,
+ fd, dict, flags, xdata);
+ trav = trav->next;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+inline gf_boolean_t
+stripe_fsetxattr_is_special (dict_t *dict)
+{
+ gf_boolean_t is_spl = _gf_false;
+
+ if (dict == NULL) {
+ goto out;
+ }
+
+ dict_foreach (dict, stripe_is_special_key, &is_spl);
+
+out:
+ return is_spl;
+}
+
+int
+stripe_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int flags, dict_t *xdata)
+{
+ int32_t op_ret = -1, ret = -1, op_errno = EINVAL;
+ gf_boolean_t is_spl = _gf_false;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.*stripe*", dict,
+ op_errno, err);
+
+ is_spl = stripe_fsetxattr_is_special (dict);
+ if (is_spl) {
+ ret = stripe_fsetxattr_to_everyone (frame, this, fd, dict,
+ flags, xdata);
+ if (ret < 0) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ goto out;
+ }
+
+ STACK_WIND (frame, stripe_fsetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ fd, dict, flags, xdata);
+out:
+ return 0;
+err:
+ STRIPE_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, NULL);
+ return 0;
+}
+
+int
+stripe_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STRIPE_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+stripe_removexattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ int32_t op_errno = EINVAL;
+
+ VALIDATE_OR_GOTO (this, err);
+
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.*stripe*",
+ name, op_errno, err);
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (loc, err);
+
+ STACK_WIND (frame, stripe_removexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr,
+ loc, name, xdata);
+ return 0;
+err:
+ STRIPE_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
+ return 0;
+}
+
+
+int
+stripe_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STRIPE_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+stripe_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.*stripe*",
+ name, op_errno, err);
+
+ STACK_WIND (frame, stripe_fremovexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr,
+ fd, name, xdata);
+ return 0;
+ err:
+ STRIPE_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+stripe_readdirp_lookup_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ dict_t *xattr, struct iatt *parent)
+{
+ stripe_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
+ stripe_local_t *main_local = NULL;
+ gf_dirent_t *entry = NULL;
+ call_frame_t *prev = NULL;
+ int done = 0;
+
+ local = frame->local;
+ prev = cookie;
+
+ entry = local->dirent;
+
+ main_frame = local->orig_frame;
+ main_local = main_frame->local;
+ LOCK (&frame->lock);
+ {
+
+ local->call_count--;
+ if (!local->call_count)
+ done = 1;
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ local->op_ret = op_ret;
+ goto unlock;
+ }
+
+ if (stripe_ctx_handle(this, prev, local, xattr))
+ gf_log(this->name, GF_LOG_ERROR,
+ "Error getting fctx info from dict.");
+
+ correct_file_size(stbuf, local->fctx, prev);
+
+ stripe_iatt_merge (stbuf, &entry->d_stat);
+ local->stbuf_blocks += stbuf->ia_blocks;
+ }
+unlock:
+ UNLOCK(&frame->lock);
+
+ if (done) {
+ inode_ctx_put (entry->inode, this,
+ (uint64_t) (long)local->fctx);
+
+ done = 0;
+ LOCK (&main_frame->lock);
+ {
+ main_local->wind_count--;
+ if (!main_local->wind_count)
+ done = 1;
+ if (local->op_ret == -1) {
+ main_local->op_errno = local->op_errno;
+ main_local->op_ret = local->op_ret;
+ }
+ entry->d_stat.ia_blocks = local->stbuf_blocks;
+ }
+ UNLOCK (&main_frame->lock);
+ if (done) {
+ main_frame->local = NULL;
+ STRIPE_STACK_UNWIND (readdir, main_frame,
+ main_local->op_ret,
+ main_local->op_errno,
+ &main_local->entries, NULL);
+ gf_dirent_free (&main_local->entries);
+ stripe_local_wipe (main_local);
+ mem_put (main_local);
+ }
+ frame->local = NULL;
+ stripe_local_wipe (local);
+ mem_put (local);
+ STRIPE_STACK_DESTROY (frame);
+ }
+
+ return 0;
+}
+
+int32_t
+stripe_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ gf_dirent_t *orig_entries, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ gf_dirent_t *local_entry = NULL;
+ gf_dirent_t *tmp_entry = NULL;
+ xlator_list_t *trav = NULL;
+ loc_t loc = {0, };
+ int32_t count = 0;
+ stripe_private_t *priv = NULL;
+ int32_t subvols = 0;
+ dict_t *xattrs = NULL;
+ call_frame_t *local_frame = NULL;
+ stripe_local_t *local_ent = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+ prev = cookie;
+ local = frame->local;
+ trav = this->children;
+ priv = this->private;
+
+ subvols = priv->child_count;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s returned error %s",
+ prev->this->name, strerror (op_errno));
+ local->op_errno = op_errno;
+ local->op_ret = op_ret;
+ goto unlock;
+ } else {
+ local->op_ret = op_ret;
+ list_splice_init (&orig_entries->list,
+ &local->entries.list);
+ local->wind_count = op_ret;
+ }
+
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (op_ret == -1)
+ goto out;
+
+ xattrs = dict_new ();
+ if (xattrs)
+ (void) stripe_xattr_request_build (this, xattrs, 0, 0, 0, 0);
+ count = op_ret;
+ list_for_each_entry_safe (local_entry, tmp_entry,
+ (&local->entries.list), list) {
+
+ if (!local_entry)
+ break;
+ if (!IA_ISREG (local_entry->d_stat.ia_type)) {
+ LOCK (&frame->lock);
+ {
+ local->wind_count--;
+ count = local->wind_count;
+ }
+ UNLOCK (&frame->lock);
+ continue;
+ }
+
+ local_frame = copy_frame (frame);
+
+ if (!local_frame) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto out;
+ }
+
+ local_ent = mem_get0 (this->local_pool);
+ if (!local_ent) {
+ op_errno = ENOMEM;
+ op_ret = -1;
goto out;
- }
- memcpy (stripe_opt->path_pattern, pattern, strlen (pattern));
-
- gf_log (this->name, GF_LOG_DEBUG,
- "block-size : pattern %s : size %"PRId64,
- stripe_opt->path_pattern, stripe_opt->block_size);
-
- if (!priv->pattern) {
- priv->pattern = stripe_opt;
+ }
+
+ loc.inode = inode_ref (local_entry->inode);
+
+ uuid_copy (loc.gfid, local_entry->d_stat.ia_gfid);
+
+ local_ent->orig_frame = frame;
+
+ local_ent->call_count = subvols;
+
+ local_ent->dirent = local_entry;
+
+ local_frame->local = local_ent;
+
+ trav = this->children;
+ while (trav) {
+ STACK_WIND (local_frame, stripe_readdirp_lookup_cbk,
+ trav->xlator, trav->xlator->fops->lookup,
+ &loc, xattrs);
+ trav = trav->next;
+ }
+ loc_wipe (&loc);
+ }
+out:
+ if (!count) {
+ /* all entries are directories */
+ frame->local = NULL;
+ STRIPE_STACK_UNWIND (readdir, frame, local->op_ret,
+ local->op_errno, &local->entries, NULL);
+ gf_dirent_free (&local->entries);
+ stripe_local_wipe (local);
+ mem_put (local);
+ }
+ if (xattrs)
+ dict_unref (xattrs);
+ return 0;
+
+}
+int32_t
+stripe_readdirp (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t off, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ stripe_private_t *priv = NULL;
+ xlator_list_t *trav = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ priv = this->private;
+ trav = this->children;
+
+ if (priv->first_child_down) {
+ op_errno = ENOTCONN;
+ goto err;
+ }
+
+ /* Initialization */
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ frame->local = local;
+
+ local->fd = fd_ref (fd);
+
+ local->wind_count = 0;
+
+ local->count = 0;
+ local->op_ret = -1;
+ INIT_LIST_HEAD(&local->entries);
+
+ if (!trav)
+ goto err;
+
+ STACK_WIND (frame, stripe_readdirp_cbk, trav->xlator,
+ trav->xlator->fops->readdirp, fd, size, off, xdata);
+ return 0;
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ STRIPE_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ goto out;
+
+ ret = xlator_mem_acct_init (this, gf_stripe_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+static int
+clear_pattern_list (stripe_private_t *priv)
+{
+ struct stripe_options *prev = NULL;
+ struct stripe_options *trav = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("stripe", priv, out);
+
+ trav = priv->pattern;
+ priv->pattern = NULL;
+ while (trav) {
+ prev = trav;
+ trav = trav->next;
+ GF_FREE (prev);
+ }
+
+ ret = 0;
+ out:
+ return ret;
+
+
+}
+
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+
+ stripe_private_t *priv = NULL;
+ data_t *data = NULL;
+ int ret = -1;
+ volume_option_t *opt = NULL;
+
+ GF_ASSERT (this);
+ GF_ASSERT (this->private);
+
+ priv = this->private;
+
+
+ ret = 0;
+ LOCK (&priv->lock);
+ {
+ ret = clear_pattern_list (priv);
+ if (ret)
+ goto unlock;
+
+ data = dict_get (options, "block-size");
+ if (data) {
+ ret = set_stripe_block_size (this, priv, data->data);
+ if (ret)
+ goto unlock;
} else {
- temp_stripeopt = priv->pattern;
- while (temp_stripeopt->next)
- temp_stripeopt = temp_stripeopt->next;
- temp_stripeopt->next = stripe_opt;
+ opt = xlator_volume_option_get (this, "block-size");
+ if (!opt) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "option 'block-size' not found");
+ ret = -1;
+ goto unlock;
+ }
+
+ if (gf_string2bytesize (opt->default_value, &priv->block_size)){
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to set default block-size ");
+ ret = -1;
+ goto unlock;
+ }
}
- stripe_str = strtok_r (NULL, ",", &tmp_str);
+
+ GF_OPTION_RECONF("coalesce", priv->coalesce, options, bool,
+ unlock);
}
+ unlock:
+ UNLOCK (&priv->lock);
+ if (ret)
+ goto out;
ret = 0;
out:
return ret;
+
}
/**
- * init - This function is called when xlator-graph gets initialized.
+ * init - This function is called when xlator-graph gets initialized.
* The option given in volfiles are parsed here.
- * @this -
+ * @this -
*/
int32_t
init (xlator_t *this)
{
stripe_private_t *priv = NULL;
+ volume_option_t *opt = NULL;
xlator_list_t *trav = NULL;
data_t *data = NULL;
int32_t count = 0;
int ret = -1;
+ if (!this)
+ goto out;
+
trav = this->children;
while (trav) {
count++;
@@ -3314,16 +5158,27 @@ init (xlator_t *this)
gf_log (this->name, GF_LOG_WARNING,
"dangling volume. check volfile ");
}
-
- priv = CALLOC (1, sizeof (stripe_private_t));
+
+ if (count == 1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "stripe configured with only one \"subvolumes\" option."
+ " please check the volume. exiting");
+ goto out;
+ }
+
+ priv = GF_CALLOC (1, sizeof (stripe_private_t),
+ gf_stripe_mt_stripe_private_t);
+
if (!priv)
goto out;
- priv->xl_array = CALLOC (count, sizeof (xlator_t *));
+ priv->xl_array = GF_CALLOC (count, sizeof (xlator_t *),
+ gf_stripe_mt_xlator_t);
if (!priv->xl_array)
goto out;
- priv->state = CALLOC (count, sizeof (int8_t));
- if (!priv->xl_array)
+ priv->last_event = GF_CALLOC (count, sizeof (int),
+ gf_stripe_mt_int32_t);
+ if (!priv->last_event)
goto out;
priv->child_count = count;
@@ -3343,128 +5198,617 @@ init (xlator_t *this)
goto out;
}
- priv->block_size = (128 * GF_UNIT_KB);
- /* option stripe-pattern *avi:1GB,*pdf:4096 */
- data = dict_get (this->options, "block-size");
- if (!data) {
- gf_log (this->name, GF_LOG_DEBUG,
- "No \"option block-size <x>\" given, defaulting "
- "to 128KB");
- } else {
- ret = set_stripe_block_size (this, priv, data->data);
- if (ret)
- goto out;
- }
-
- priv->xattr_supported = 1;
- data = dict_get (this->options, "use-xattr");
- if (data) {
- if (gf_string2boolean (data->data,
- &priv->xattr_supported) == -1) {
+ ret = 0;
+ LOCK (&priv->lock);
+ {
+ opt = xlator_volume_option_get (this, "block-size");
+ if (!opt) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "option 'block-size' not found");
+ ret = -1;
+ goto unlock;
+ }
+ if (gf_string2bytesize (opt->default_value, &priv->block_size)){
gf_log (this->name, GF_LOG_ERROR,
- "error setting hard check for extended "
- "attribute");
- //return -1;
+ "Unable to set default block-size ");
+ ret = -1;
+ goto unlock;
+ }
+ /* option stripe-pattern *avi:1GB,*pdf:16K */
+ data = dict_get (this->options, "block-size");
+ if (data) {
+ ret = set_stripe_block_size (this, priv, data->data);
+ if (ret)
+ goto unlock;
}
}
+ unlock:
+ UNLOCK (&priv->lock);
+ if (ret)
+ goto out;
+ GF_OPTION_INIT ("use-xattr", priv->xattr_supported, bool, out);
/* notify related */
priv->nodes_down = priv->child_count;
+
+ GF_OPTION_INIT("coalesce", priv->coalesce, bool, out);
+
+ this->local_pool = mem_pool_new (stripe_local_t, 128);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto out;
+ }
+
this->private = priv;
ret = 0;
- out:
+out:
if (ret) {
if (priv) {
- if (priv->xl_array)
- FREE (priv->xl_array);
- FREE (priv);
+ GF_FREE (priv->xl_array);
+ GF_FREE (priv);
}
}
return ret;
-}
+}
-/**
+/**
* fini - Free all the private variables
- * @this -
+ * @this -
*/
-void
+void
fini (xlator_t *this)
{
stripe_private_t *priv = NULL;
struct stripe_options *prev = NULL;
struct stripe_options *trav = NULL;
+ if (!this)
+ goto out;
+
priv = this->private;
if (priv) {
- if (priv->xl_array)
- FREE (priv->xl_array);
+ this->private = NULL;
+ GF_FREE (priv->xl_array);
trav = priv->pattern;
while (trav) {
prev = trav;
trav = trav->next;
- FREE (prev);
+ GF_FREE (prev);
}
+ GF_FREE (priv->last_event);
LOCK_DESTROY (&priv->lock);
- FREE (priv);
+ GF_FREE (priv);
}
+out:
return;
}
+int32_t
+stripe_getxattr_unwind (call_frame_t *frame,
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
-struct xlator_fops fops = {
- .stat = stripe_stat,
- .unlink = stripe_unlink,
- .symlink = stripe_symlink,
- .rename = stripe_rename,
- .link = stripe_link,
- .chmod = stripe_chmod,
- .chown = stripe_chown,
- .truncate = stripe_truncate,
- .utimens = stripe_utimens,
- .create = stripe_create,
- .open = stripe_open,
- .readv = stripe_readv,
- .writev = stripe_writev,
- .statfs = stripe_statfs,
- .flush = stripe_flush,
- .fsync = stripe_fsync,
- .setxattr = stripe_setxattr,
- .getxattr = stripe_getxattr,
- .removexattr = stripe_removexattr,
- .access = stripe_access,
- .ftruncate = stripe_ftruncate,
- .fstat = stripe_fstat,
- .readlink = stripe_readlink,
- .mkdir = stripe_mkdir,
- .rmdir = stripe_rmdir,
- .lk = stripe_lk,
- .opendir = stripe_opendir,
- .fsyncdir = stripe_fsyncdir,
- .fchmod = stripe_fchmod,
- .fchown = stripe_fchown,
- .lookup = stripe_lookup,
- .setdents = stripe_setdents,
- .mknod = stripe_mknod,
-};
+{
+ STRIPE_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+int
+stripe_internal_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+
+ char size_key[256] = {0,};
+ char index_key[256] = {0,};
+ char count_key[256] = {0,};
+ char coalesce_key[256] = {0,};
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (frame->local, out);
+
+ if (!xattr || (op_ret == -1))
+ goto out;
+
+ sprintf (size_key, "trusted.%s.stripe-size", this->name);
+ sprintf (count_key, "trusted.%s.stripe-count", this->name);
+ sprintf (index_key, "trusted.%s.stripe-index", this->name);
+ sprintf (coalesce_key, "trusted.%s.stripe-coalesce", this->name);
+
+ dict_del (xattr, size_key);
+ dict_del (xattr, count_key);
+ dict_del (xattr, index_key);
+ dict_del (xattr, coalesce_key);
+
+out:
+ STRIPE_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, xdata);
+
+ return 0;
+
+}
+
+int
+stripe_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
+{
+ int call_cnt = 0;
+ stripe_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (frame->local, out);
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ call_cnt = --local->wind_count;
+ }
+ UNLOCK (&frame->lock);
+
+ if (!xattr || (op_ret < 0))
+ goto out;
+
+ local->op_ret = 0;
+
+ if (!local->xattr) {
+ local->xattr = dict_ref (xattr);
+ } else {
+ stripe_aggregate_xattr (local->xattr, xattr);
+ }
+
+out:
+ if (!call_cnt) {
+ STRIPE_STACK_UNWIND (getxattr, frame, local->op_ret, op_errno,
+ local->xattr, xdata);
+ }
+
+ return 0;
+}
+
+int32_t
+stripe_vgetxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ int32_t callcnt = 0;
+ int32_t ret = -1;
+ long cky = 0;
+ void *xattr_val = NULL;
+ void *xattr_serz = NULL;
+ stripe_xattr_sort_t *xattr = NULL;
+ dict_t *stripe_xattr = NULL;
+
+ if (!frame || !frame->local || !this) {
+ gf_log ("", GF_LOG_ERROR, "Possible NULL deref");
+ return ret;
+ }
+
+ local = frame->local;
+ cky = (long) cookie;
-struct xlator_mops mops = {
- .stats = stripe_stats,
+ if (local->xsel[0] == '\0') {
+ gf_log (this->name, GF_LOG_ERROR, "Empty xattr in cbk");
+ return ret;
+ }
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->wind_count;
+
+ if (!dict || (op_ret < 0))
+ goto out;
+
+ if (!local->xattr_list)
+ local->xattr_list = (stripe_xattr_sort_t *)
+ GF_CALLOC (local->nallocs,
+ sizeof (stripe_xattr_sort_t),
+ gf_stripe_mt_xattr_sort_t);
+
+ if (local->xattr_list) {
+ xattr = local->xattr_list + (int32_t) cky;
+
+ ret = dict_get_ptr_and_len (dict, local->xsel,
+ &xattr_val,
+ &xattr->xattr_len);
+ if (xattr->xattr_len == 0)
+ goto out;
+
+ xattr->pos = cky;
+ xattr->xattr_value = gf_memdup (xattr_val,
+ xattr->xattr_len);
+
+ if (xattr->xattr_value != NULL)
+ local->xattr_total_len += xattr->xattr_len + 1;
+ }
+ }
+ out:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (!local->xattr_total_len)
+ goto unwind;
+
+ stripe_xattr = dict_new ();
+ if (!stripe_xattr)
+ goto unwind;
+
+ /* select filler based on ->xsel */
+ if (XATTR_IS_PATHINFO (local->xsel))
+ ret = stripe_fill_pathinfo_xattr (this, local,
+ (char **)&xattr_serz);
+ else if (XATTR_IS_LOCKINFO (local->xsel)) {
+ ret = stripe_fill_lockinfo_xattr (this, local,
+ &xattr_serz);
+ } else {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unknown xattr in xattr request");
+ goto unwind;
+ }
+
+ if (!ret) {
+ ret = dict_set_dynptr (stripe_xattr, local->xsel,
+ xattr_serz,
+ local->xattr_total_len);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Can't set %s key in dict",
+ local->xsel);
+ }
+
+ unwind:
+ STRIPE_STACK_UNWIND (getxattr, frame, op_ret, op_errno,
+ stripe_xattr, NULL);
+
+ ret = stripe_free_xattr_str (local);
+
+ GF_FREE (local->xattr_list);
+
+ if (stripe_xattr)
+ dict_unref (stripe_xattr);
+ }
+
+ return ret;
+}
+
+int32_t
+stripe_getxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ xlator_list_t *trav = NULL;
+ stripe_private_t *priv = NULL;
+ int32_t op_errno = EINVAL;
+ int i = 0;
+ xlator_t **sub_volumes;
+ int ret = 0;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+
+ priv = this->private;
+ trav = this->children;
+
+ /* Initialization */
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ local->op_ret = -1;
+ frame->local = local;
+ loc_copy (&local->loc, loc);
+
+
+ if (name && (strcmp (GF_XATTR_MARKER_KEY, name) == 0)
+ && (GF_CLIENT_PID_GSYNCD == frame->root->pid)) {
+ local->marker.call_count = priv->child_count;
+
+ sub_volumes = alloca ( priv->child_count *
+ sizeof (xlator_t *));
+ for (i = 0, trav = this->children; trav ;
+ trav = trav->next, i++) {
+
+ *(sub_volumes + i) = trav->xlator;
+
+ }
+
+ if (cluster_getmarkerattr (frame, this, loc, name,
+ local, stripe_getxattr_unwind,
+ sub_volumes, priv->child_count,
+ MARKER_UUID_TYPE, marker_uuid_default_gauge,
+ priv->vol_uuid)) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ return 0;
+ }
+
+ if (name && strncmp (name, GF_XATTR_QUOTA_SIZE_KEY,
+ strlen (GF_XATTR_QUOTA_SIZE_KEY)) == 0) {
+ local->wind_count = priv->child_count;
+
+ for (i = 0, trav=this->children; i < priv->child_count; i++,
+ trav = trav->next) {
+ STACK_WIND (frame, stripe_getxattr_cbk,
+ trav->xlator, trav->xlator->fops->getxattr,
+ loc, name, xdata);
+ }
+
+ return 0;
+ }
+
+ if (name &&
+ ((strncmp (name, GF_XATTR_PATHINFO_KEY,
+ strlen (GF_XATTR_PATHINFO_KEY)) == 0))) {
+ if (IA_ISREG (loc->inode->ia_type)) {
+ ret = inode_ctx_get (loc->inode, this,
+ (uint64_t *) &local->fctx);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "stripe size unavailable from fctx"
+ " relying on pathinfo could lead to"
+ " wrong results");
+ }
+
+ local->nallocs = local->wind_count = priv->child_count;
+ (void) strncpy (local->xsel, name, strlen (name));
+
+ /**
+ * for xattrs that need info from all childs, fill ->xsel
+ * as above and call the filler function in cbk based on
+ * it
+ */
+ for (i = 0, trav = this->children; i < priv->child_count; i++,
+ trav = trav->next) {
+ STACK_WIND_COOKIE (frame, stripe_vgetxattr_cbk,
+ (void *) (long) i, trav->xlator,
+ trav->xlator->fops->getxattr,
+ loc, name, xdata);
+ }
+
+ return 0;
+ }
+
+ if (name &&(*priv->vol_uuid)) {
+ if ((match_uuid_local (name, priv->vol_uuid) == 0)
+ && (GF_CLIENT_PID_GSYNCD == frame->root->pid)) {
+
+ if (!IA_FILE_OR_DIR (loc->inode->ia_type))
+ local->marker.call_count = 1;
+ else
+ local->marker.call_count = priv->child_count;
+
+ sub_volumes = alloca (local->marker.call_count *
+ sizeof (xlator_t *));
+
+ for (i = 0, trav = this->children;
+ i < local->marker.call_count;
+ i++, trav = trav->next) {
+ *(sub_volumes + i) = trav->xlator;
+
+ }
+
+ if (cluster_getmarkerattr (frame, this, loc, name,
+ local,
+ stripe_getxattr_unwind,
+ sub_volumes,
+ local->marker.call_count,
+ MARKER_XTIME_TYPE,
+ marker_xtime_default_gauge,
+ priv->vol_uuid)) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ return 0;
+ }
+ }
+
+
+ STACK_WIND (frame, stripe_internal_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
+
+ return 0;
+
+err:
+ STRIPE_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+inline gf_boolean_t
+stripe_is_special_xattr (const char *name)
+{
+ gf_boolean_t is_spl = _gf_false;
+
+ if (!name) {
+ goto out;
+ }
+
+ if (!strncmp (name, GF_XATTR_LOCKINFO_KEY,
+ strlen (GF_XATTR_LOCKINFO_KEY))
+ || !strncmp (name, GF_XATTR_PATHINFO_KEY,
+ strlen (GF_XATTR_PATHINFO_KEY)))
+ is_spl = _gf_true;
+out:
+ return is_spl;
+}
+
+int32_t
+stripe_fgetxattr_from_everyone (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ stripe_private_t *priv = NULL;
+ int32_t ret = -1, op_errno = 0;
+ int i = 0;
+ xlator_list_t *trav = NULL;
+
+ priv = this->private;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->op_ret = -1;
+ frame->local = local;
+
+ strncpy (local->xsel, name, strlen (name));
+ local->nallocs = local->wind_count = priv->child_count;
+
+ for (i = 0, trav = this->children; i < priv->child_count; i++,
+ trav = trav->next) {
+ STACK_WIND_COOKIE (frame, stripe_vgetxattr_cbk,
+ (void *) (long) i, trav->xlator,
+ trav->xlator->fops->fgetxattr,
+ fd, name, xdata);
+ }
+
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT (fgetxattr, frame, -1, op_errno, NULL, NULL);
+ return ret;
+}
+
+int32_t
+stripe_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ if (stripe_is_special_xattr (name)) {
+ stripe_fgetxattr_from_everyone (frame, this, fd, name, xdata);
+ goto out;
+ }
+
+ STACK_WIND (frame, stripe_internal_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
+
+out:
+ return 0;
+}
+
+
+
+int32_t
+stripe_priv_dump (xlator_t *this)
+{
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i = 0;
+ stripe_private_t *priv = NULL;
+ int ret = -1;
+ struct stripe_options *options = NULL;
+
+ GF_VALIDATE_OR_GOTO ("stripe", this, out);
+
+ priv = this->private;
+ if (!priv)
+ goto out;
+
+ ret = TRY_LOCK (&priv->lock);
+ if (ret != 0)
+ goto out;
+
+ gf_proc_dump_add_section("xlator.cluster.stripe.%s.priv", this->name);
+ gf_proc_dump_write("child_count","%d", priv->child_count);
+
+ for (i = 0; i < priv->child_count; i++) {
+ sprintf (key, "subvolumes[%d]", i);
+ gf_proc_dump_write (key, "%s.%s", priv->xl_array[i]->type,
+ priv->xl_array[i]->name);
+ }
+
+ options = priv->pattern;
+ while (options != NULL) {
+ gf_proc_dump_write ("path_pattern", "%s", priv->pattern->path_pattern);
+ gf_proc_dump_write ("options_block_size", "%ul", options->block_size);
+
+ options = options->next;
+ }
+
+ gf_proc_dump_write ("block_size", "%ul", priv->block_size);
+ gf_proc_dump_write ("nodes-down", "%d", priv->nodes_down);
+ gf_proc_dump_write ("first-child_down", "%d", priv->first_child_down);
+ gf_proc_dump_write ("xattr_supported", "%d", priv->xattr_supported);
+
+ UNLOCK (&priv->lock);
+
+out:
+ return ret;
+}
+
+struct xlator_fops fops = {
+ .stat = stripe_stat,
+ .unlink = stripe_unlink,
+ .rename = stripe_rename,
+ .link = stripe_link,
+ .truncate = stripe_truncate,
+ .create = stripe_create,
+ .open = stripe_open,
+ .readv = stripe_readv,
+ .writev = stripe_writev,
+ .statfs = stripe_statfs,
+ .flush = stripe_flush,
+ .fsync = stripe_fsync,
+ .ftruncate = stripe_ftruncate,
+ .fstat = stripe_fstat,
+ .mkdir = stripe_mkdir,
+ .rmdir = stripe_rmdir,
+ .lk = stripe_lk,
+ .opendir = stripe_opendir,
+ .fsyncdir = stripe_fsyncdir,
+ .setattr = stripe_setattr,
+ .fsetattr = stripe_fsetattr,
+ .lookup = stripe_lookup,
+ .mknod = stripe_mknod,
+ .setxattr = stripe_setxattr,
+ .fsetxattr = stripe_fsetxattr,
+ .getxattr = stripe_getxattr,
+ .fgetxattr = stripe_fgetxattr,
+ .removexattr = stripe_removexattr,
+ .fremovexattr = stripe_fremovexattr,
+ .readdirp = stripe_readdirp,
+ .fallocate = stripe_fallocate,
+ .discard = stripe_discard,
+ .zerofill = stripe_zerofill,
};
struct xlator_cbks cbks = {
.release = stripe_release,
+ .forget = stripe_forget,
};
+struct xlator_dumpops dumpops = {
+ .priv = stripe_priv_dump,
+};
struct volume_options options[] = {
- { .key = {"block-size"},
- .type = GF_OPTION_TYPE_ANY
+ { .key = {"block-size"},
+ .type = GF_OPTION_TYPE_SIZE_LIST,
+ .default_value = "128KB",
+ .min = STRIPE_MIN_BLOCK_SIZE,
+ .description = "Size of the stripe unit that would be read "
+ "from or written to the striped servers."
},
- { .key = {"use-xattr"},
- .type = GF_OPTION_TYPE_BOOL
+ { .key = {"use-xattr"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "true"
},
+ { .key = {"coalesce"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "true",
+ .description = "Enable/Disable coalesce mode to flatten striped "
+ "files as stored on the server (i.e., eliminate holes "
+ "caused by the traditional format)."
+ },
{ .key = {NULL} },
};
diff --git a/xlators/cluster/stripe/src/stripe.h b/xlators/cluster/stripe/src/stripe.h
index 5ffbc3670..5673d18f3 100644
--- a/xlators/cluster/stripe/src/stripe.h
+++ b/xlators/cluster/stripe/src/stripe.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -32,12 +23,63 @@
#include "common-utils.h"
#include "compat.h"
#include "compat-errno.h"
+#include "stripe-mem-types.h"
+#include "libxlator.h"
#include <fnmatch.h>
#include <signal.h>
+#define STRIPE_PATHINFO_HEADER "STRIPE:"
+#define STRIPE_MIN_BLOCK_SIZE (16*GF_UNIT_KB)
+
+#define STRIPE_STACK_UNWIND(fop, frame, params ...) do { \
+ stripe_local_t *__local = NULL; \
+ if (frame) { \
+ __local = frame->local; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT (fop, frame, params); \
+ if (__local) { \
+ stripe_local_wipe(__local); \
+ mem_put (__local); \
+ } \
+ } while (0)
+
+#define STRIPE_STACK_DESTROY(frame) do { \
+ stripe_local_t *__local = NULL; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ STACK_DESTROY (frame->root); \
+ if (__local) { \
+ stripe_local_wipe (__local); \
+ mem_put (__local); \
+ } \
+ } while (0)
+
+#define STRIPE_VALIDATE_FCTX(fctx, label) do { \
+ int idx = 0; \
+ if (!fctx) { \
+ op_errno = EINVAL; \
+ goto label; \
+ } \
+ for (idx = 0; idx < fctx->stripe_count; idx++) { \
+ if (!fctx->xl_array[idx]) { \
+ gf_log (this->name, GF_LOG_ERROR, \
+ "fctx->xl_array[%d] is NULL", \
+ idx); \
+ op_errno = ESTALE; \
+ goto label; \
+ } \
+ } \
+ } while (0)
+
+typedef struct stripe_xattr_sort {
+ int pos;
+ int xattr_len;
+ char *xattr_value;
+} stripe_xattr_sort_t;
/**
- * struct stripe_options : This keeps the pattern and the block-size
+ * struct stripe_options : This keeps the pattern and the block-size
* information, which is used for striping on a file.
*/
struct stripe_options {
@@ -47,7 +89,7 @@ struct stripe_options {
};
/**
- * Private structure for stripe translator
+ * Private structure for stripe translator
*/
struct stripe_private {
struct stripe_options *pattern;
@@ -56,27 +98,31 @@ struct stripe_private {
gf_lock_t lock;
uint8_t nodes_down;
int8_t first_child_down;
+ int *last_event;
int8_t child_count;
- int8_t *state; /* Current state of child node */
gf_boolean_t xattr_supported; /* default yes */
+ gf_boolean_t coalesce;
+ char vol_uuid[UUID_SIZE + 1];
};
/**
- * Used to keep info about the replies received from fops->readv calls
+ * Used to keep info about the replies received from readv/writev calls
*/
-struct readv_replies {
+struct stripe_replies {
struct iovec *vector;
int32_t count; //count of vector
int32_t op_ret; //op_ret of readv
int32_t op_errno;
- struct stat stbuf; /* 'stbuf' is also a part of reply */
+ int32_t requested_size;
+ struct iatt stbuf; /* 'stbuf' is also a part of reply */
};
typedef struct _stripe_fd_ctx {
off_t stripe_size;
int stripe_count;
+ int stripe_coalesce;
int static_array;
- xlator_t **xl_array;
+ xlator_t **xl_array;
} stripe_fd_ctx_t;
@@ -87,28 +133,45 @@ struct stripe_local; /* this itself is used inside the structure; */
struct stripe_local {
struct stripe_local *next;
- call_frame_t *orig_frame;
-
+ call_frame_t *orig_frame;
+
stripe_fd_ctx_t *fctx;
/* Used by _cbk functions */
- struct stat stbuf;
- struct readv_replies *replies;
- struct statvfs statvfs_buf;
- dir_entry_t *entry;
- struct xlator_stats stats;
+ struct iatt stbuf;
+ struct iatt pre_buf;
+ struct iatt post_buf;
+ struct iatt preparent;
+ struct iatt postparent;
+
+ off_t stbuf_size;
+ off_t prebuf_size;
+ off_t postbuf_size;
+ off_t preparent_size;
+ off_t postparent_size;
+
+ blkcnt_t stbuf_blocks;
+ blkcnt_t prebuf_blocks;
+ blkcnt_t postbuf_blocks;
+ blkcnt_t preparent_blocks;
+ blkcnt_t postparent_blocks;
+
+ struct stripe_replies *replies;
+ struct statvfs statvfs_buf;
+ dir_entry_t *entry;
int8_t revalidate;
int8_t failed;
int8_t unwind;
+ size_t readv_size;
int32_t entry_count;
int32_t node_index;
int32_t call_count;
- int32_t wind_count; /* used instead of child_cound
+ int32_t wind_count; /* used instead of child_cound
in case of read and write */
int32_t op_ret;
- int32_t op_errno;
+ int32_t op_errno;
int32_t count;
int32_t flags;
char *name;
@@ -117,8 +180,17 @@ struct stripe_local {
loc_t loc;
loc_t loc2;
+ mode_t mode;
+ dev_t rdev;
/* For File I/O fops */
- dict_t *dict;
+ dict_t *xdata;
+
+ stripe_xattr_sort_t *xattr_list;
+ int32_t xattr_total_len;
+ int32_t nallocs;
+ char xsel[256];
+
+ struct marker_str marker;
/* General usage */
off_t offset;
@@ -128,13 +200,89 @@ struct stripe_local {
int entry_self_heal_needed;
int8_t *list;
- struct flock lock;
+ struct gf_flock lock;
fd_t *fd;
void *value;
struct iobref *iobref;
+ gf_dirent_t entries;
+ gf_dirent_t *dirent;
+ dict_t *xattr;
+ uuid_t ia_gfid;
+
+ int xflag;
+ mode_t umask;
};
typedef struct stripe_local stripe_local_t;
typedef struct stripe_private stripe_private_t;
+/*
+ * Determine the stripe index of a particular frame based on the translator.
+ */
+static inline int32_t stripe_get_frame_index(stripe_fd_ctx_t *fctx,
+ call_frame_t *prev)
+{
+ int32_t i, idx = -1;
+
+ for (i = 0; i < fctx->stripe_count; i++) {
+ if (fctx->xl_array[i] == prev->this) {
+ idx = i;
+ break;
+ }
+ }
+
+ return idx;
+}
+
+static inline void stripe_copy_xl_array(xlator_t **dst, xlator_t **src,
+ int count)
+{
+ int i;
+
+ for (i = 0; i < count; i++)
+ dst[i] = src[i];
+}
+
+void stripe_local_wipe (stripe_local_t *local);
+int32_t stripe_ctx_handle (xlator_t *this, call_frame_t *prev,
+ stripe_local_t *local, dict_t *dict);
+void stripe_aggregate_xattr (dict_t *dst, dict_t *src);
+int32_t stripe_xattr_request_build (xlator_t *this, dict_t *dict,
+ uint64_t stripe_size, uint32_t stripe_count,
+ uint32_t stripe_index,
+ uint32_t stripe_coalesce);
+int32_t stripe_get_matching_bs (const char *path, stripe_private_t *priv);
+int set_stripe_block_size (xlator_t *this, stripe_private_t *priv, char *data);
+int32_t stripe_iatt_merge (struct iatt *from, struct iatt *to);
+int32_t stripe_fill_pathinfo_xattr (xlator_t *this, stripe_local_t *local,
+ char **xattr_serz);
+int32_t stripe_free_xattr_str (stripe_local_t *local);
+int32_t stripe_xattr_aggregate (char *buffer, stripe_local_t *local,
+ int32_t *total);
+off_t coalesced_offset(off_t offset, uint64_t stripe_size, int stripe_count);
+off_t uncoalesced_size(off_t size, uint64_t stripe_size, int stripe_count,
+ int stripe_index);
+int32_t
+stripe_fill_lockinfo_xattr (xlator_t *this, stripe_local_t *local,
+ void **xattr_serz);
+
+/*
+ * Adjust the size attribute for files if coalesce is enabled.
+ */
+static inline void correct_file_size(struct iatt *buf, stripe_fd_ctx_t *fctx,
+ call_frame_t *prev)
+{
+ int index;
+
+ if (!IA_ISREG(buf->ia_type))
+ return;
+
+ if (!fctx || !fctx->stripe_coalesce)
+ return;
+
+ index = stripe_get_frame_index(fctx, prev);
+ buf->ia_size = uncoalesced_size(buf->ia_size, fctx->stripe_size,
+ fctx->stripe_count, index);
+}
+
#endif /* _STRIPE_H_ */
diff --git a/xlators/cluster/unify/src/Makefile.am b/xlators/cluster/unify/src/Makefile.am
deleted file mode 100644
index b9e6f63e9..000000000
--- a/xlators/cluster/unify/src/Makefile.am
+++ /dev/null
@@ -1,16 +0,0 @@
-
-xlator_LTLIBRARIES = unify.la
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
-
-unify_la_LDFLAGS = -module -avoidversion
-
-unify_la_SOURCES = unify.c unify-self-heal.c
-unify_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-noinst_HEADERS = unify.h
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
-
-CLEANFILES =
-
diff --git a/xlators/cluster/unify/src/unify-self-heal.c b/xlators/cluster/unify/src/unify-self-heal.c
deleted file mode 100644
index 7c62495c2..000000000
--- a/xlators/cluster/unify/src/unify-self-heal.c
+++ /dev/null
@@ -1,1225 +0,0 @@
-/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-/**
- * unify-self-heal.c :
- * This file implements few functions which enables 'unify' translator
- * to be consistent in its behaviour when
- * > a node fails,
- * > a node gets added,
- * > a failed node comes back
- * > a new namespace server is added (ie, an fresh namespace server).
- *
- * This functionality of 'unify' will enable glusterfs to support storage
- * system failure, and maintain consistancy. This works both ways, ie, when
- * an entry (either file or directory) is found on namespace server, and not
- * on storage nodes, its created in storage nodes and vica-versa.
- *
- * The two fops, where it can be implemented are 'getdents ()' and 'lookup ()'
- *
- */
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "unify.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "common-utils.h"
-
-int32_t
-unify_sh_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count);
-
-int32_t
-unify_sh_ns_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count);
-
-int32_t
-unify_bgsh_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count);
-
-int32_t
-unify_bgsh_ns_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count);
-
-/**
- * unify_local_wipe - free all the extra allocation of local->* here.
- */
-static void
-unify_local_wipe (unify_local_t *local)
-{
- /* Free the strdup'd variables in the local structure */
- if (local->name) {
- FREE (local->name);
- }
-
- if (local->sh_struct) {
- if (local->sh_struct->offset_list)
- FREE (local->sh_struct->offset_list);
-
- if (local->sh_struct->entry_list)
- FREE (local->sh_struct->entry_list);
-
- if (local->sh_struct->count_list)
- FREE (local->sh_struct->count_list);
-
- FREE (local->sh_struct);
- }
-
- loc_wipe (&local->loc1);
- loc_wipe (&local->loc2);
-}
-
-int32_t
-unify_sh_setdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- inode_t *inode = NULL;
- dict_t *tmp_dict = NULL;
- dir_entry_t *prev, *entry, *trav;
-
- LOCK (&frame->lock);
- {
- /* if local->call_count == 0, that means, setdents on
- * storagenodes is still pending.
- */
- if (local->call_count)
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (callcnt == 0) {
- if (local->sh_struct->entry_list[0]) {
- prev = entry = local->sh_struct->entry_list[0];
- if (!entry)
- return 0;
- trav = entry->next;
- while (trav) {
- prev->next = trav->next;
- FREE (trav->name);
- if (S_ISLNK (trav->buf.st_mode))
- FREE (trav->link);
- FREE (trav);
- trav = prev->next;
- }
- FREE (entry);
- }
-
- if (!local->flags) {
- if (local->sh_struct->count_list[0] >=
- UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- /* count == size, that means, there are more entries
- to read from */
- //local->call_count = 0;
- local->sh_struct->offset_list[0] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
- STACK_WIND (frame,
- unify_sh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[0],
- GF_GET_DIR_ONLY);
- }
- } else {
- inode = local->loc1.inode;
- fd_unref (local->fd);
- tmp_dict = local->dict;
-
- unify_local_wipe (local);
-
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- inode, &local->stbuf, local->dict);
- if (tmp_dict)
- dict_unref (tmp_dict);
- }
- }
-
- return 0;
-}
-
-
-int32_t
-unify_sh_ns_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count)
-{
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = 0;
- unsigned long final = 0;
- dir_entry_t *tmp = CALLOC (1, sizeof (dir_entry_t));
-
- local->sh_struct->entry_list[0] = tmp;
- local->sh_struct->count_list[0] = count;
- if (entry) {
- tmp->next = entry->next;
- entry->next = NULL;
- }
-
- if ((count < UNIFY_SELF_HEAL_GETDENTS_COUNT) || !entry) {
- final = 1;
- }
-
- LOCK (&frame->lock);
- {
- /* local->call_count will be '0' till now. make it 1 so, it
- can be UNWIND'ed for the last call. */
- local->call_count = priv->child_count;
- if (final)
- local->flags = 1;
- }
- UNLOCK (&frame->lock);
-
- for (index = 0; index < priv->child_count; index++)
- {
- STACK_WIND_COOKIE (frame,
- unify_sh_setdents_cbk,
- (void *)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->setdents,
- local->fd, GF_SET_DIR_ONLY,
- local->sh_struct->entry_list[0], count);
- }
-
- return 0;
-}
-
-int32_t
-unify_sh_ns_setdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = (long)cookie;
- dir_entry_t *prev, *entry, *trav;
-
- LOCK (&frame->lock);
- {
- if (local->sh_struct->entry_list[index]) {
- prev = entry = local->sh_struct->entry_list[index];
- trav = entry->next;
- while (trav) {
- prev->next = trav->next;
- FREE (trav->name);
- if (S_ISLNK (trav->buf.st_mode))
- FREE (trav->link);
- FREE (trav);
- trav = prev->next;
- }
- FREE (entry);
- }
- }
- UNLOCK (&frame->lock);
-
- if (local->sh_struct->count_list[index] <
- UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
- } else {
- /* count == size, that means, there are more entries
- to read from */
- local->sh_struct->offset_list[index] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
- STACK_WIND_COOKIE (frame,
- unify_sh_getdents_cbk,
- cookie,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[index],
- GF_GET_ALL);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir on (%s) with offset %"PRId64"",
- priv->xl_array[index]->name,
- local->sh_struct->offset_list[index]);
- }
-
- if (!callcnt) {
- /* All storage nodes have done unified setdents on NS node.
- * Now, do getdents from NS and do setdents on storage nodes.
- */
-
- /* sh_struct->offset_list is no longer required for
- storage nodes now */
- local->sh_struct->offset_list[0] = 0; /* reset */
-
- STACK_WIND (frame,
- unify_sh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_DIR_ONLY);
- }
-
- return 0;
-}
-
-
-/**
- * unify_sh_getdents_cbk -
- */
-int32_t
-unify_sh_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = (long)cookie;
- dir_entry_t *tmp = NULL;
-
- if (op_ret >= 0 && count > 0) {
- /* There is some dentry found, just send the dentry to NS */
- tmp = CALLOC (1, sizeof (dir_entry_t));
- local->sh_struct->entry_list[index] = tmp;
- local->sh_struct->count_list[index] = count;
- if (entry) {
- tmp->next = entry->next;
- entry->next = NULL;
- }
- STACK_WIND_COOKIE (frame,
- unify_sh_ns_setdents_cbk,
- cookie,
- NS(this),
- NS(this)->fops->setdents,
- local->fd,
- GF_SET_IF_NOT_PRESENT,
- local->sh_struct->entry_list[index],
- count);
- return 0;
- }
-
- if (count < UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
- } else {
- /* count == size, that means, there are more entries
- to read from */
- local->sh_struct->offset_list[index] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
- STACK_WIND_COOKIE (frame,
- unify_sh_getdents_cbk,
- cookie,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[index],
- GF_GET_ALL);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir on (%s) with offset %"PRId64"",
- priv->xl_array[index]->name,
- local->sh_struct->offset_list[index]);
- }
-
- if (!callcnt) {
- /* All storage nodes have done unified setdents on NS node.
- * Now, do getdents from NS and do setdents on storage nodes.
- */
-
- /* sh_struct->offset_list is no longer required for
- storage nodes now */
- local->sh_struct->offset_list[0] = 0; /* reset */
-
- STACK_WIND (frame,
- unify_sh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_DIR_ONLY);
- }
-
- return 0;
-}
-
-/**
- * unify_sh_opendir_cbk -
- *
- * @cookie:
- */
-int32_t
-unify_sh_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t index = 0;
- inode_t *inode = NULL;
- dict_t *tmp_dict = NULL;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- } else {
- gf_log (this->name, GF_LOG_WARNING, "failed");
- local->failed = 1;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local->call_count = priv->child_count + 1;
-
- if (!local->failed) {
- /* send getdents() namespace after finishing
- storage nodes */
- local->call_count--;
-
- fd_bind (fd);
-
- if (local->call_count) {
- /* Used as the offset index. This list keeps
- * track of offset sent to each node during
- * STACK_WIND.
- */
- local->sh_struct->offset_list =
- calloc (priv->child_count,
- sizeof (off_t));
- ERR_ABORT (local->sh_struct->offset_list);
-
- local->sh_struct->entry_list =
- calloc (priv->child_count,
- sizeof (dir_entry_t *));
- ERR_ABORT (local->sh_struct->entry_list);
-
- local->sh_struct->count_list =
- calloc (priv->child_count,
- sizeof (int));
- ERR_ABORT (local->sh_struct->count_list);
-
- /* Send getdents on all the fds */
- for (index = 0;
- index < priv->child_count; index++) {
- STACK_WIND_COOKIE (frame,
- unify_sh_getdents_cbk,
- (void *)(long)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_ALL);
- }
-
- /* did stack wind, so no need to unwind here */
- return 0;
- } /* (local->call_count) */
- } /* (!local->failed) */
-
- /* Opendir failed on one node. */
- inode = local->loc1.inode;
- fd_unref (local->fd);
- tmp_dict = local->dict;
-
- unify_local_wipe (local);
- /* Only 'self-heal' failed, lookup() was successful. */
- local->op_ret = 0;
-
- /* This is lookup_cbk ()'s UNWIND. */
- STACK_UNWIND (frame, local->op_ret, local->op_errno, inode,
- &local->stbuf, local->dict);
- if (tmp_dict)
- dict_unref (tmp_dict);
- }
-
- return 0;
-}
-
-/**
- * gf_sh_checksum_cbk -
- *
- * @frame: frame used in lookup. get a copy of it, and use that copy.
- * @this: pointer to unify xlator.
- * @inode: pointer to inode, for which the consistency check is required.
- *
- */
-int32_t
-unify_sh_checksum_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- uint8_t *file_checksum,
- uint8_t *dir_checksum)
-{
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t index = 0;
- int32_t callcnt = 0;
- inode_t *inode = NULL;
- dict_t *tmp_dict = NULL;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret >= 0) {
- if (NS(this) == (xlator_t *)cookie) {
- memcpy (local->sh_struct->ns_file_checksum,
- file_checksum, NAME_MAX);
- memcpy (local->sh_struct->ns_dir_checksum,
- dir_checksum, NAME_MAX);
- } else {
- if (local->entry_count == 0) {
- /* Initialize the dir_checksum to be
- * used for comparision with other
- * storage nodes. Should be done for
- * the first successful call *only*.
- */
- /* Using 'entry_count' as a flag */
- local->entry_count = 1;
- memcpy (local->sh_struct->dir_checksum,
- dir_checksum, NAME_MAX);
- }
-
- /* Reply from the storage nodes */
- for (index = 0;
- index < NAME_MAX; index++) {
- /* Files should be present in
- only one node */
- local->sh_struct->file_checksum[index] ^= file_checksum[index];
-
- /* directory structure should be
- same accross */
- if (local->sh_struct->dir_checksum[index] != dir_checksum[index])
- local->failed = 1;
- }
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- for (index = 0; index < NAME_MAX ; index++) {
- if (local->sh_struct->file_checksum[index] !=
- local->sh_struct->ns_file_checksum[index]) {
- local->failed = 1;
- break;
- }
- if (local->sh_struct->dir_checksum[index] !=
- local->sh_struct->ns_dir_checksum[index]) {
- local->failed = 1;
- break;
- }
- }
-
- if (local->failed) {
- /* Log it, it should be a rare event */
- gf_log (this->name, GF_LOG_WARNING,
- "Self-heal triggered on directory %s",
- local->loc1.path);
-
- /* Any self heal will be done at directory level */
- local->call_count = 0;
- local->op_ret = -1;
- local->failed = 0;
-
- local->fd = fd_create (local->loc1.inode,
- frame->root->pid);
-
- local->call_count = priv->child_count + 1;
-
- for (index = 0;
- index < (priv->child_count + 1); index++) {
- STACK_WIND_COOKIE (frame,
- unify_sh_opendir_cbk,
- priv->xl_array[index]->name,
- priv->xl_array[index],
- priv->xl_array[index]->fops->opendir,
- &local->loc1,
- local->fd);
- }
- /* opendir can be done on the directory */
- return 0;
- }
-
- /* no mismatch */
- inode = local->loc1.inode;
- tmp_dict = local->dict;
-
- unify_local_wipe (local);
-
- /* This is lookup_cbk ()'s UNWIND. */
- STACK_UNWIND (frame,
- local->op_ret,
- local->op_errno,
- inode,
- &local->stbuf,
- local->dict);
- if (tmp_dict)
- dict_unref (tmp_dict);
- }
-
- return 0;
-}
-
-/* Foreground self-heal part over */
-
-/* Background self-heal part */
-
-int32_t
-unify_bgsh_setdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- dir_entry_t *prev, *entry, *trav;
-
- LOCK (&frame->lock);
- {
- /* if local->call_count == 0, that means, setdents
- on storagenodes is still pending. */
- if (local->call_count)
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
-
- if (callcnt == 0) {
- if (local->sh_struct->entry_list[0]) {
- prev = entry = local->sh_struct->entry_list[0];
- trav = entry->next;
- while (trav) {
- prev->next = trav->next;
- FREE (trav->name);
- if (S_ISLNK (trav->buf.st_mode))
- FREE (trav->link);
- FREE (trav);
- trav = prev->next;
- }
- FREE (entry);
- }
-
- if (!local->flags) {
- if (local->sh_struct->count_list[0] >=
- UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- /* count == size, that means, there are more
- entries to read from */
- //local->call_count = 0;
- local->sh_struct->offset_list[0] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
- STACK_WIND (frame,
- unify_bgsh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[0],
- GF_GET_DIR_ONLY);
- }
- } else {
- fd_unref (local->fd);
- unify_local_wipe (local);
- STACK_DESTROY (frame->root);
- }
- }
-
- return 0;
-}
-
-
-int32_t
-unify_bgsh_ns_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count)
-{
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = 0;
- unsigned long final = 0;
- dir_entry_t *tmp = CALLOC (1, sizeof (dir_entry_t));
-
- local->sh_struct->entry_list[0] = tmp;
- local->sh_struct->count_list[0] = count;
- if (entry) {
- tmp->next = entry->next;
- entry->next = NULL;
- }
-
- if ((count < UNIFY_SELF_HEAL_GETDENTS_COUNT) || !entry) {
- final = 1;
- }
-
- LOCK (&frame->lock);
- {
- /* local->call_count will be '0' till now. make it 1 so,
- it can be UNWIND'ed for the last call. */
- local->call_count = priv->child_count;
- if (final)
- local->flags = 1;
- }
- UNLOCK (&frame->lock);
-
- for (index = 0; index < priv->child_count; index++)
- {
- STACK_WIND_COOKIE (frame,
- unify_bgsh_setdents_cbk,
- (void *)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->setdents,
- local->fd, GF_SET_DIR_ONLY,
- local->sh_struct->entry_list[0], count);
- }
-
- return 0;
-}
-
-int32_t
-unify_bgsh_ns_setdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = (long)cookie;
- dir_entry_t *prev, *entry, *trav;
-
- if (local->sh_struct->entry_list[index]) {
- prev = entry = local->sh_struct->entry_list[index];
- if (!entry)
- return 0;
- trav = entry->next;
- while (trav) {
- prev->next = trav->next;
- FREE (trav->name);
- if (S_ISLNK (trav->buf.st_mode))
- FREE (trav->link);
- FREE (trav);
- trav = prev->next;
- }
- FREE (entry);
- }
-
- if (local->sh_struct->count_list[index] <
- UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
- } else {
- /* count == size, that means, there are more entries
- to read from */
- local->sh_struct->offset_list[index] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
- STACK_WIND_COOKIE (frame,
- unify_bgsh_getdents_cbk,
- cookie,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[index],
- GF_GET_ALL);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir on (%s) with offset %"PRId64"",
- priv->xl_array[index]->name,
- local->sh_struct->offset_list[index]);
- }
-
- if (!callcnt) {
- /* All storage nodes have done unified setdents on NS node.
- * Now, do getdents from NS and do setdents on storage nodes.
- */
-
- /* sh_struct->offset_list is no longer required for
- storage nodes now */
- local->sh_struct->offset_list[0] = 0; /* reset */
-
- STACK_WIND (frame,
- unify_bgsh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_DIR_ONLY);
- }
-
- return 0;
-}
-
-
-/**
- * unify_bgsh_getdents_cbk -
- */
-int32_t
-unify_bgsh_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = (long)cookie;
- dir_entry_t *tmp = NULL;
-
- if (op_ret >= 0 && count > 0) {
- /* There is some dentry found, just send the dentry to NS */
- tmp = CALLOC (1, sizeof (dir_entry_t));
- local->sh_struct->entry_list[index] = tmp;
- local->sh_struct->count_list[index] = count;
- if (entry) {
- tmp->next = entry->next;
- entry->next = NULL;
- }
- STACK_WIND_COOKIE (frame,
- unify_bgsh_ns_setdents_cbk,
- cookie,
- NS(this),
- NS(this)->fops->setdents,
- local->fd,
- GF_SET_IF_NOT_PRESENT,
- local->sh_struct->entry_list[index],
- count);
- return 0;
- }
-
- if (count < UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
- } else {
- /* count == size, that means, there are more entries to read from */
- local->sh_struct->offset_list[index] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
-
- STACK_WIND_COOKIE (frame,
- unify_bgsh_getdents_cbk,
- cookie,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[index],
- GF_GET_ALL);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir on (%s) with offset %"PRId64"",
- priv->xl_array[index]->name,
- local->sh_struct->offset_list[index]);
- }
-
- if (!callcnt) {
- /* All storage nodes have done unified setdents on NS node.
- * Now, do getdents from NS and do setdents on storage nodes.
- */
-
- /* sh_struct->offset_list is no longer required for
- storage nodes now */
- local->sh_struct->offset_list[0] = 0; /* reset */
-
- STACK_WIND (frame,
- unify_bgsh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_DIR_ONLY);
- }
-
- return 0;
-}
-
-/**
- * unify_bgsh_opendir_cbk -
- *
- * @cookie:
- */
-int32_t
-unify_bgsh_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int32_t callcnt = 0;
- int16_t index = 0;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- } else {
- local->failed = 1;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local->call_count = priv->child_count + 1;
-
- if (!local->failed) {
- /* send getdents() namespace after finishing
- storage nodes */
- local->call_count--;
- callcnt = local->call_count;
-
- fd_bind (fd);
-
- if (local->call_count) {
- /* Used as the offset index. This list keeps
- track of offset sent to each node during
- STACK_WIND. */
- local->sh_struct->offset_list =
- calloc (priv->child_count,
- sizeof (off_t));
- ERR_ABORT (local->sh_struct->offset_list);
-
- local->sh_struct->entry_list =
- calloc (priv->child_count,
- sizeof (dir_entry_t *));
- ERR_ABORT (local->sh_struct->entry_list);
-
- local->sh_struct->count_list =
- calloc (priv->child_count,
- sizeof (int));
- ERR_ABORT (local->sh_struct->count_list);
-
- /* Send getdents on all the fds */
- for (index = 0;
- index < priv->child_count; index++) {
- STACK_WIND_COOKIE (frame,
- unify_bgsh_getdents_cbk,
- (void *)(long)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_ALL);
- }
- /* did a stack wind, so no need to unwind here */
- return 0;
- } /* (local->call_count) */
- } /* (!local->failed) */
-
- /* Opendir failed on one node. */
- fd_unref (local->fd);
-
- unify_local_wipe (local);
- STACK_DESTROY (frame->root);
- }
-
- return 0;
-}
-
-/**
- * gf_bgsh_checksum_cbk -
- *
- * @frame: frame used in lookup. get a copy of it, and use that copy.
- * @this: pointer to unify xlator.
- * @inode: pointer to inode, for which the consistency check is required.
- *
- */
-int32_t
-unify_bgsh_checksum_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- uint8_t *file_checksum,
- uint8_t *dir_checksum)
-{
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t index = 0;
- int32_t callcnt = 0;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret >= 0) {
- if (NS(this) == (xlator_t *)cookie) {
- memcpy (local->sh_struct->ns_file_checksum,
- file_checksum, NAME_MAX);
- memcpy (local->sh_struct->ns_dir_checksum,
- dir_checksum, NAME_MAX);
- } else {
- if (local->entry_count == 0) {
- /* Initialize the dir_checksum to be
- * used for comparision with other
- * storage nodes. Should be done for
- * the first successful call *only*.
- */
- /* Using 'entry_count' as a flag */
- local->entry_count = 1;
- memcpy (local->sh_struct->dir_checksum,
- dir_checksum, NAME_MAX);
- }
-
- /* Reply from the storage nodes */
- for (index = 0;
- index < NAME_MAX; index++) {
- /* Files should be present in only
- one node */
- local->sh_struct->file_checksum[index] ^= file_checksum[index];
-
- /* directory structure should be same
- accross */
- if (local->sh_struct->dir_checksum[index] != dir_checksum[index])
- local->failed = 1;
- }
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- for (index = 0; index < NAME_MAX ; index++) {
- if (local->sh_struct->file_checksum[index] !=
- local->sh_struct->ns_file_checksum[index]) {
- local->failed = 1;
- break;
- }
- if (local->sh_struct->dir_checksum[index] !=
- local->sh_struct->ns_dir_checksum[index]) {
- local->failed = 1;
- break;
- }
- }
-
- if (local->failed) {
- /* Log it, it should be a rare event */
- gf_log (this->name, GF_LOG_WARNING,
- "Self-heal triggered on directory %s",
- local->loc1.path);
-
- /* Any self heal will be done at the directory level */
- local->op_ret = -1;
- local->failed = 0;
-
- local->fd = fd_create (local->loc1.inode,
- frame->root->pid);
- local->call_count = priv->child_count + 1;
-
- for (index = 0;
- index < (priv->child_count + 1); index++) {
- STACK_WIND_COOKIE (frame,
- unify_bgsh_opendir_cbk,
- priv->xl_array[index]->name,
- priv->xl_array[index],
- priv->xl_array[index]->fops->opendir,
- &local->loc1,
- local->fd);
- }
-
- /* opendir can be done on the directory */
- return 0;
- }
-
- /* no mismatch */
- unify_local_wipe (local);
- STACK_DESTROY (frame->root);
- }
-
- return 0;
-}
-
-/* Background self-heal part over */
-
-
-
-
-/**
- * zr_unify_self_heal -
- *
- * @frame: frame used in lookup. get a copy of it, and use that copy.
- * @this: pointer to unify xlator.
- * @inode: pointer to inode, for which the consistency check is required.
- *
- */
-int32_t
-zr_unify_self_heal (call_frame_t *frame,
- xlator_t *this,
- unify_local_t *local)
-{
- unify_private_t *priv = this->private;
- call_frame_t *bg_frame = NULL;
- unify_local_t *bg_local = NULL;
- inode_t *tmp_inode = NULL;
- dict_t *tmp_dict = NULL;
- int16_t index = 0;
-
- if (local->inode_generation < priv->inode_generation) {
- /* Any self heal will be done at the directory level */
- /* Update the inode's generation to the current generation
- value. */
- local->inode_generation = priv->inode_generation;
- inode_ctx_put (local->loc1.inode, this,
- (uint64_t)(long)local->inode_generation);
-
- if (priv->self_heal == ZR_UNIFY_FG_SELF_HEAL) {
- local->op_ret = 0;
- local->failed = 0;
- local->call_count = priv->child_count + 1;
- local->sh_struct =
- calloc (1, sizeof (struct unify_self_heal_struct));
-
- /* +1 is for NS */
- for (index = 0;
- index < (priv->child_count + 1); index++) {
- STACK_WIND_COOKIE (frame,
- unify_sh_checksum_cbk,
- priv->xl_array[index],
- priv->xl_array[index],
- priv->xl_array[index]->fops->checksum,
- &local->loc1,
- 0);
- }
-
- /* Self-heal in foreground, hence no need
- to UNWIND here */
- return 0;
- }
-
- /* Self Heal done in background */
- bg_frame = copy_frame (frame);
- INIT_LOCAL (bg_frame, bg_local);
- loc_copy (&bg_local->loc1, &local->loc1);
- bg_local->op_ret = 0;
- bg_local->failed = 0;
- bg_local->call_count = priv->child_count + 1;
- bg_local->sh_struct =
- calloc (1, sizeof (struct unify_self_heal_struct));
-
- /* +1 is for NS */
- for (index = 0; index < (priv->child_count + 1); index++) {
- STACK_WIND_COOKIE (bg_frame,
- unify_bgsh_checksum_cbk,
- priv->xl_array[index],
- priv->xl_array[index],
- priv->xl_array[index]->fops->checksum,
- &bg_local->loc1,
- 0);
- }
- }
-
- /* generation number matches, self heal already done or
- * self heal done in background: just do STACK_UNWIND
- */
- tmp_inode = local->loc1.inode;
- tmp_dict = local->dict;
-
- unify_local_wipe (local);
-
- /* This is lookup_cbk ()'s UNWIND. */
- STACK_UNWIND (frame,
- local->op_ret,
- local->op_errno,
- tmp_inode,
- &local->stbuf,
- local->dict);
-
- if (tmp_dict)
- dict_unref (tmp_dict);
-
- return 0;
-}
-
diff --git a/xlators/cluster/unify/src/unify.c b/xlators/cluster/unify/src/unify.c
deleted file mode 100644
index 35c108963..000000000
--- a/xlators/cluster/unify/src/unify.c
+++ /dev/null
@@ -1,4506 +0,0 @@
-/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-/**
- * xlators/cluster/unify:
- * - This xlator is one of the main translator in GlusterFS, which
- * actually does the clustering work of the file system. One need to
- * understand that, unify assumes file to be existing in only one of
- * the child node, and directories to be present on all the nodes.
- *
- * NOTE:
- * Now, unify has support for global namespace, which is used to keep a
- * global view of fs's namespace tree. The stat for directories are taken
- * just from the namespace, where as for files, just 'st_ino' is taken from
- * Namespace node, and other stat info is taken from the actual storage node.
- * Also Namespace node helps to keep consistant inode for files across
- * glusterfs (re-)mounts.
- */
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "unify.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include <signal.h>
-#include <libgen.h>
-#include "compat-errno.h"
-#include "compat.h"
-
-#define UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR(_loc) do { \
- if (!(_loc && _loc->inode)) { \
- STACK_UNWIND (frame, -1, EINVAL, NULL, NULL, NULL); \
- return 0; \
- } \
-} while(0)
-
-
-#define UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR(_fd) do { \
- if (!(_fd && !fd_ctx_get (_fd, this, NULL))) { \
- STACK_UNWIND (frame, -1, EBADFD, NULL, NULL); \
- return 0; \
- } \
-} while(0)
-
-#define UNIFY_CHECK_FD_AND_UNWIND_ON_ERR(_fd) do { \
- if (!_fd) { \
- STACK_UNWIND (frame, -1, EBADFD, NULL, NULL); \
- return 0; \
- } \
-} while(0)
-
-/**
- * unify_local_wipe - free all the extra allocation of local->* here.
- */
-static void
-unify_local_wipe (unify_local_t *local)
-{
- /* Free the strdup'd variables in the local structure */
- if (local->name) {
- FREE (local->name);
- }
- loc_wipe (&local->loc1);
- loc_wipe (&local->loc2);
-}
-
-
-
-/*
- * unify_normalize_stats -
- */
-void
-unify_normalize_stats (struct statvfs *buf,
- unsigned long bsize,
- unsigned long frsize)
-{
- double factor;
-
- if (buf->f_bsize != bsize) {
- factor = ((double) buf->f_bsize) / bsize;
- buf->f_bsize = bsize;
- buf->f_bfree = (fsblkcnt_t) (factor * buf->f_bfree);
- buf->f_bavail = (fsblkcnt_t) (factor * buf->f_bavail);
- }
-
- if (buf->f_frsize != frsize) {
- factor = ((double) buf->f_frsize) / frsize;
- buf->f_frsize = frsize;
- buf->f_blocks = (fsblkcnt_t) (factor * buf->f_blocks);
- }
-}
-
-
-xlator_t *
-unify_loc_subvol (loc_t *loc, xlator_t *this)
-{
- unify_private_t *priv = NULL;
- xlator_t *subvol = NULL;
- int16_t *list = NULL;
- long index = 0;
- xlator_t *subvol_i = NULL;
- int ret = 0;
- uint64_t tmp_list = 0;
-
- priv = this->private;
- subvol = NS (this);
-
- if (!S_ISDIR (loc->inode->st_mode)) {
- ret = inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
- if (!list)
- goto out;
-
- for (index = 0; list[index] != -1; index++) {
- subvol_i = priv->xl_array[list[index]];
- if (subvol_i != NS (this)) {
- subvol = subvol_i;
- break;
- }
- }
- }
-out:
- return subvol;
-}
-
-
-
-/**
- * unify_statfs_cbk -
- */
-int32_t
-unify_statfs_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct statvfs *stbuf)
-{
- int32_t callcnt = 0;
- struct statvfs *dict_buf = NULL;
- unsigned long bsize;
- unsigned long frsize;
- unify_local_t *local = (unify_local_t *)frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret >= 0) {
- /* when a call is successfull, add it to local->dict */
- dict_buf = &local->statvfs_buf;
-
- if (dict_buf->f_bsize != 0) {
- bsize = max (dict_buf->f_bsize,
- stbuf->f_bsize);
-
- frsize = max (dict_buf->f_frsize,
- stbuf->f_frsize);
- unify_normalize_stats(dict_buf, bsize, frsize);
- unify_normalize_stats(stbuf, bsize, frsize);
- } else {
- dict_buf->f_bsize = stbuf->f_bsize;
- dict_buf->f_frsize = stbuf->f_frsize;
- }
-
- dict_buf->f_blocks += stbuf->f_blocks;
- dict_buf->f_bfree += stbuf->f_bfree;
- dict_buf->f_bavail += stbuf->f_bavail;
- dict_buf->f_files += stbuf->f_files;
- dict_buf->f_ffree += stbuf->f_ffree;
- dict_buf->f_favail += stbuf->f_favail;
- dict_buf->f_fsid = stbuf->f_fsid;
- dict_buf->f_flag = stbuf->f_flag;
- dict_buf->f_namemax = stbuf->f_namemax;
- local->op_ret = op_ret;
- } else {
- /* fop on storage node has failed due to some error */
- if (op_errno != ENOTCONN) {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): %s",
- prev_frame->this->name,
- strerror (op_errno));
- }
- local->op_errno = op_errno;
- }
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->statvfs_buf);
- }
-
- return 0;
-}
-
-/**
- * unify_statfs -
- */
-int32_t
-unify_statfs (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- unify_local_t *local = NULL;
- xlator_list_t *trav = this->children;
-
- INIT_LOCAL (frame, local);
- local->call_count = ((unify_private_t *)this->private)->child_count;
-
- while(trav) {
- STACK_WIND (frame,
- unify_statfs_cbk,
- trav->xlator,
- trav->xlator->fops->statfs,
- loc);
- trav = trav->next;
- }
-
- return 0;
-}
-
-/**
- * unify_buf_cbk -
- */
-int32_t
-unify_buf_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s(): child(%s): path(%s): %s",
- gf_fop_list[frame->root->op],
- prev_frame->this->name,
- (local->loc1.path)?local->loc1.path:"",
- strerror (op_errno));
-
- local->op_errno = op_errno;
- if ((op_errno == ENOENT) && priv->optimist)
- local->op_ret = 0;
- }
-
- if (op_ret >= 0) {
- local->op_ret = 0;
-
- if (NS (this) == prev_frame->this) {
- local->st_ino = buf->st_ino;
- /* If the entry is directory, get the stat
- from NS node */
- if (S_ISDIR (buf->st_mode) ||
- !local->stbuf.st_blksize) {
- local->stbuf = *buf;
- }
- }
-
- if ((!S_ISDIR (buf->st_mode)) &&
- (NS (this) != prev_frame->this)) {
- /* If file, take the stat info from Storage
- node. */
- local->stbuf = *buf;
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- /* If the inode number is not filled, operation should
- fail */
- if (!local->st_ino)
- local->op_ret = -1;
-
- local->stbuf.st_ino = local->st_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stbuf);
- }
-
- return 0;
-}
-
-#define check_if_dht_linkfile(s) ((s->st_mode & ~S_IFMT) == S_ISVTX)
-
-/**
- * unify_lookup_cbk -
- */
-int32_t
-unify_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf,
- dict_t *dict)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- inode_t *tmp_inode = NULL;
- dict_t *local_dict = NULL;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- if (local->revalidate &&
- (op_errno == ESTALE)) {
- /* ESTALE takes priority */
- local->op_errno = op_errno;
- local->failed = 1;
- }
-
- if ((op_errno != ENOTCONN)
- && (op_errno != ENOENT)
- && (local->op_errno != ESTALE)) {
- /* if local->op_errno is already ESTALE, then
- * ESTALE has to propogated to the parent first.
- * do not enter here.
- */
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- priv->xl_array[(long)cookie]->name,
- local->loc1.path, strerror (op_errno));
- local->op_errno = op_errno;
- local->failed = 1;
-
- } else if (local->revalidate &&
- (local->op_errno != ESTALE) &&
- !(priv->optimist && (op_errno == ENOENT))) {
-
- gf_log (this->name,
- (op_errno == ENOTCONN) ?
- GF_LOG_DEBUG:GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- priv->xl_array[(long)cookie]->name,
- local->loc1.path, strerror (op_errno));
- local->op_errno = op_errno;
- local->failed = 1;
- }
- }
-
- if (op_ret == 0) {
- local->op_ret = 0;
-
- if (check_if_dht_linkfile(buf)) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "file %s may be DHT link file on %s, "
- "make sure the backend is not shared "
- "between unify and DHT",
- local->loc1.path,
- priv->xl_array[(long)cookie]->name);
- }
-
- if (local->stbuf.st_mode && local->stbuf.st_blksize) {
- /* make sure we already have a stbuf
- stored in local->stbuf */
- if (S_ISDIR (local->stbuf.st_mode) &&
- !S_ISDIR (buf->st_mode)) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "[CRITICAL] '%s' is directory "
- "on namespace, non-directory "
- "on node '%s', returning EIO",
- local->loc1.path,
- priv->xl_array[(long)cookie]->name);
- local->return_eio = 1;
- }
- if (!S_ISDIR (local->stbuf.st_mode) &&
- S_ISDIR (buf->st_mode)) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "[CRITICAL] '%s' is directory "
- "on node '%s', non-directory "
- "on namespace, returning EIO",
- local->loc1.path,
- priv->xl_array[(long)cookie]->name);
- local->return_eio = 1;
- }
- }
-
- if (!local->revalidate && !S_ISDIR (buf->st_mode)) {
- /* This is the first time lookup on file*/
- if (!local->list) {
- /* list is not allocated, allocate
- the max possible range */
- local->list = CALLOC (1, 2 * (priv->child_count + 2));
- if (!local->list) {
- gf_log (this->name,
- GF_LOG_CRITICAL,
- "Not enough memory");
- STACK_UNWIND (frame, -1,
- ENOMEM, inode,
- NULL, NULL);
- return 0;
- }
- }
- /* update the index of the list */
- local->list [local->index++] =
- (int16_t)(long)cookie;
- }
-
- if (!local->revalidate && S_ISDIR (buf->st_mode)) {
- /* fresh lookup of a directory */
- inode_ctx_put (local->loc1.inode, this,
- priv->inode_generation);
- }
-
- if ((!local->dict) && dict &&
- (priv->xl_array[(long)cookie] != NS(this))) {
- local->dict = dict_ref (dict);
- }
-
- /* index of NS node is == total child count */
- if (priv->child_count == (int16_t)(long)cookie) {
- /* Take the inode number from namespace */
- local->st_ino = buf->st_ino;
- if (S_ISDIR (buf->st_mode) ||
- !(local->stbuf.st_blksize)) {
- local->stbuf = *buf;
- }
- } else if (!S_ISDIR (buf->st_mode)) {
- /* If file, then get the stat from
- storage node */
- local->stbuf = *buf;
- }
-
- if (local->st_nlink < buf->st_nlink) {
- local->st_nlink = buf->st_nlink;
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local_dict = local->dict;
- if (local->return_eio) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "[CRITICAL] Unable to fix the path (%s) with "
- "self-heal, try manual verification. "
- "returning EIO.", local->loc1.path);
- unify_local_wipe (local);
- STACK_UNWIND (frame, -1, EIO, inode, NULL, NULL);
- if (local_dict) {
- dict_unref (local_dict);
- }
- return 0;
- }
-
- if (!local->stbuf.st_blksize) {
- /* Inode not present */
- local->op_ret = -1;
- } else {
- if (!local->revalidate &&
- !S_ISDIR (local->stbuf.st_mode)) {
- /* If its a file, big array is useless,
- allocate the smaller one */
- int16_t *list = NULL;
- list = CALLOC (1, 2 * (local->index + 1));
- ERR_ABORT (list);
- memcpy (list, local->list, 2 * local->index);
- /* Make the end of the list as -1 */
- FREE (local->list);
- local->list = list;
- local->list [local->index] = -1;
- /* Update the inode's ctx with proper array */
- /* TODO: log on failure */
- inode_ctx_put (local->loc1.inode, this,
- (uint64_t)(long)local->list);
- }
-
- if (S_ISDIR(local->loc1.inode->st_mode)) {
- /* lookup is done for directory */
- if (local->failed && priv->self_heal) {
- /* Triggering self-heal */
- /* means, self-heal required for this
- inode */
- local->inode_generation = 0;
- priv->inode_generation++;
- }
- } else {
- local->stbuf.st_ino = local->st_ino;
- }
-
- local->stbuf.st_nlink = local->st_nlink;
- }
- if (local->op_ret == -1) {
- if (!local->revalidate && local->list)
- FREE (local->list);
- }
-
- if ((local->op_ret >= 0) && local->failed &&
- local->revalidate) {
- /* Done revalidate, but it failed */
- if ((op_errno != ENOTCONN)
- && (local->op_errno != ESTALE)) {
- gf_log (this->name, GF_LOG_ERROR,
- "Revalidate failed for path(%s): %s",
- local->loc1.path, strerror (op_errno));
- }
- local->op_ret = -1;
- }
-
- if ((priv->self_heal && !priv->optimist) &&
- (!local->revalidate && (local->op_ret == 0) &&
- S_ISDIR(local->stbuf.st_mode))) {
- /* Let the self heal be done here */
- zr_unify_self_heal (frame, this, local);
- local_dict = NULL;
- } else {
- if (local->failed) {
- /* NOTE: directory lookup is sent to all
- * subvolumes and success from a subvolume
- * might set local->op_ret to 0 (zero) */
- local->op_ret = -1;
- }
-
- /* either no self heal, or op_ret == -1 (failure) */
- tmp_inode = local->loc1.inode;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- tmp_inode, &local->stbuf, local->dict);
- }
- if (local_dict) {
- dict_unref (local_dict);
- }
- }
-
- return 0;
-}
-
-/**
- * unify_lookup -
- */
-int32_t
-unify_lookup (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xattr_req)
-{
- unify_local_t *local = NULL;
- unify_private_t *priv = this->private;
- int16_t *list = NULL;
- long index = 0;
-
- if (!(loc && loc->inode)) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: Argument not right", loc?loc->path:"(null)");
- STACK_UNWIND (frame, -1, EINVAL, NULL, NULL, NULL);
- return 0;
- }
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, loc->inode, NULL, NULL);
- return 0;
- }
-
- if (inode_ctx_get (loc->inode, this, NULL)
- && S_ISDIR (loc->inode->st_mode)) {
- local->revalidate = 1;
- }
-
- if (!inode_ctx_get (loc->inode, this, NULL) &&
- loc->inode->st_mode &&
- !S_ISDIR (loc->inode->st_mode)) {
- uint64_t tmp_list = 0;
- /* check if revalidate or fresh lookup */
- inode_ctx_get (loc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
- }
-
- if (local->list) {
- list = local->list;
- for (index = 0; list[index] != -1; index++);
- if (index != 2) {
- if (index < 2) {
- gf_log (this->name, GF_LOG_ERROR,
- "returning ESTALE for %s: file "
- "count is %ld", loc->path, index);
- /* Print where all the file is present */
- for (index = 0;
- local->list[index] != -1; index++) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: found on %s", loc->path,
- priv->xl_array[list[index]]->name);
- }
- unify_local_wipe (local);
- STACK_UNWIND (frame, -1, ESTALE,
- NULL, NULL, NULL);
- return 0;
- } else {
- /* There are more than 2 presences */
- /* Just log and continue */
- gf_log (this->name, GF_LOG_ERROR,
- "%s: file count is %ld",
- loc->path, index);
- /* Print where all the file is present */
- for (index = 0;
- local->list[index] != -1; index++) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: found on %s", loc->path,
- priv->xl_array[list[index]]->name);
- }
- }
- }
-
- /* is revalidate */
- local->revalidate = 1;
-
- for (index = 0; list[index] != -1; index++)
- local->call_count++;
-
- for (index = 0; list[index] != -1; index++) {
- char need_break = (list[index+1] == -1);
- STACK_WIND_COOKIE (frame,
- unify_lookup_cbk,
- (void *)(long)list[index], //cookie
- priv->xl_array [list[index]],
- priv->xl_array [list[index]]->fops->lookup,
- loc,
- xattr_req);
- if (need_break)
- break;
- }
- } else {
- if (loc->inode->st_mode) {
- if (inode_ctx_get (loc->inode, this, NULL)) {
- inode_ctx_get (loc->inode, this,
- &local->inode_generation);
- }
- }
- /* This is first call, there is no list */
- /* call count should be all child + 1 namespace */
- local->call_count = priv->child_count + 1;
-
- for (index = 0; index <= priv->child_count; index++) {
- STACK_WIND_COOKIE (frame,
- unify_lookup_cbk,
- (void *)index, //cookie
- priv->xl_array[index],
- priv->xl_array[index]->fops->lookup,
- loc,
- xattr_req);
- }
- }
-
- return 0;
-}
-
-/**
- * unify_stat - if directory, get the stat directly from NameSpace child.
- * if file, check for a hint and send it only there (also to NS).
- * if its a fresh stat, then do it on all the nodes.
- *
- * NOTE: for all the call, sending cookie as xlator pointer, which will be
- * used in cbk.
- */
-int32_t
-unify_stat (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- unify_local_t *local = NULL;
- unify_private_t *priv = this->private;
- int16_t index = 0;
- int16_t *list = NULL;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
- return 0;
- }
- local->st_ino = loc->inode->ino;
- if (S_ISDIR (loc->inode->st_mode)) {
- /* Directory */
- local->call_count = 1;
- STACK_WIND (frame, unify_buf_cbk, NS(this),
- NS(this)->fops->stat, loc);
- } else {
- /* File */
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++)
- local->call_count++;
-
- for (index = 0; list[index] != -1; index++) {
- char need_break = (list[index+1] == -1);
- STACK_WIND (frame,
- unify_buf_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->stat,
- loc);
- if (need_break)
- break;
- }
- }
-
- return 0;
-}
-
-/**
- * unify_access_cbk -
- */
-int32_t
-unify_access_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-
-/**
- * unify_access - Send request to only namespace, which has all the
- * attributes set for the file.
- */
-int32_t
-unify_access (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t mask)
-{
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- STACK_WIND (frame,
- unify_access_cbk,
- NS(this),
- NS(this)->fops->access,
- loc,
- mask);
-
- return 0;
-}
-
-int32_t
-unify_mkdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- inode_t *tmp_inode = NULL;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if ((op_ret == -1) && !(priv->optimist &&
- (op_errno == ENOENT ||
- op_errno == EEXIST))) {
- /* TODO: Decrement the inode_generation of
- * this->inode's parent inode, hence the missing
- * directory is created properly by self-heal.
- * Currently, there is no way to get the parent
- * inode directly.
- */
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- priv->xl_array[(long)cookie]->name,
- local->loc1.path, strerror (op_errno));
- if (op_errno != EEXIST)
- local->failed = 1;
- local->op_errno = op_errno;
- }
-
- if (op_ret >= 0)
- local->op_ret = 0;
-
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (!local->failed) {
- inode_ctx_put (local->loc1.inode, this,
- priv->inode_generation);
- }
-
- tmp_inode = local->loc1.inode;
- unify_local_wipe (local);
-
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- tmp_inode, &local->stbuf);
- }
-
- return 0;
-}
-
-/**
- * unify_ns_mkdir_cbk -
- */
-int32_t
-unify_ns_mkdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- long index = 0;
-
- if (op_ret == -1) {
- /* No need to send mkdir request to other servers,
- * as namespace action failed
- */
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s): %s",
- local->name, strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, NULL);
- return 0;
- }
-
- /* Create one inode for this entry */
- local->op_ret = 0;
- local->stbuf = *buf;
-
- local->call_count = priv->child_count;
-
- /* Send mkdir request to all the nodes now */
- for (index = 0; index < priv->child_count; index++) {
- STACK_WIND_COOKIE (frame,
- unify_mkdir_cbk,
- (void *)index, //cookie
- priv->xl_array[index],
- priv->xl_array[index]->fops->mkdir,
- &local->loc1,
- local->mode);
- }
-
- return 0;
-}
-
-
-/**
- * unify_mkdir -
- */
-int32_t
-unify_mkdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode)
-{
- unify_local_t *local = NULL;
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->mode = mode;
-
- loc_copy (&local->loc1, loc);
-
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, NULL, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_ns_mkdir_cbk,
- NS(this),
- NS(this)->fops->mkdir,
- loc,
- mode);
- return 0;
-}
-
-/**
- * unify_rmdir_cbk -
- */
-int32_t
-unify_rmdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret == 0 || (priv->optimist && (op_errno == ENOENT)))
- local->op_ret = 0;
- if (op_ret == -1)
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno);
- }
-
- return 0;
-}
-
-/**
- * unify_ns_rmdir_cbk -
- */
-int32_t
-unify_ns_rmdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int16_t index = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- /* No need to send rmdir request to other servers,
- * as namespace action failed
- */
- gf_log (this->name,
- ((op_errno != ENOTEMPTY) ?
- GF_LOG_ERROR : GF_LOG_DEBUG),
- "namespace: path(%s): %s",
- local->loc1.path, strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
- }
-
- local->call_count = priv->child_count;
-
- for (index = 0; index < priv->child_count; index++) {
- STACK_WIND (frame,
- unify_rmdir_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->rmdir,
- &local->loc1);
- }
-
- return 0;
-}
-
-/**
- * unify_rmdir -
- */
-int32_t
-unify_rmdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- unify_local_t *local = NULL;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
-
- loc_copy (&local->loc1, loc);
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_ns_rmdir_cbk,
- NS(this),
- NS(this)->fops->rmdir,
- loc);
-
- return 0;
-}
-
-/**
- * unify_open_cbk -
- */
-int32_t
-unify_open_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- if (NS(this) != (xlator_t *)cookie) {
- /* Store child node's ptr, used in
- all the f*** / FileIO calls */
- fd_ctx_set (fd, this, (uint64_t)(long)cookie);
- }
- }
- if (op_ret == -1) {
- local->op_errno = op_errno;
- local->failed = 1;
- }
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if ((local->failed == 1) && (local->op_ret >= 0)) {
- local->call_count = 1;
- /* return -1 to user */
- local->op_ret = -1;
- //local->op_errno = EIO;
-
- if (!fd_ctx_get (local->fd, this, NULL)) {
- gf_log (this->name, GF_LOG_ERROR,
- "Open success on child node, "
- "failed on namespace");
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "Open success on namespace, "
- "failed on child node");
- }
- }
-
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret,
- local->op_errno, local->fd);
- }
-
- return 0;
-}
-
-#ifdef GF_DARWIN_HOST_OS
-/**
- * unify_create_lookup_cbk -
- */
-int32_t
-unify_open_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf,
- dict_t *dict)
-{
- int32_t callcnt = 0;
- int16_t index = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if ((op_ret == -1) && (op_errno != ENOENT)) {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- priv->xl_array[(long)cookie]->name,
- local->loc1.path, strerror (op_errno));
- local->op_errno = op_errno;
- }
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- local->index++;
- if (NS(this) == priv->xl_array[(long)cookie]) {
- local->list[0] = (int16_t)(long)cookie;
- } else {
- local->list[1] = (int16_t)(long)cookie;
- }
- if (S_ISDIR (buf->st_mode))
- local->failed = 1;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- int16_t file_list[3] = {0,};
- local->op_ret = -1;
-
- file_list[0] = local->list[0];
- file_list[1] = local->list[1];
- file_list[2] = -1;
-
- if (local->index != 2) {
- /* Lookup failed, can't do open */
- gf_log (this->name, GF_LOG_ERROR,
- "%s: present on %d nodes",
- local->name, local->index);
-
- if (local->index < 2) {
- unify_local_wipe (local);
- gf_log (this->name, GF_LOG_ERROR,
- "returning as file found on less "
- "than 2 nodes");
- STACK_UNWIND (frame, local->op_ret,
- local->op_errno, local->fd);
- return 0;
- }
- }
-
- if (local->failed) {
- /* Open on directory, return EISDIR */
- unify_local_wipe (local);
- STACK_UNWIND (frame, -1, EISDIR, local->fd);
- return 0;
- }
-
- /* Everything is perfect :) */
- local->call_count = 2;
-
- for (index = 0; file_list[index] != -1; index++) {
- char need_break = (file_list[index+1] == -1);
- STACK_WIND_COOKIE (frame,
- unify_open_cbk,
- priv->xl_array[file_list[index]],
- priv->xl_array[file_list[index]],
- priv->xl_array[file_list[index]]->fops->open,
- &local->loc1,
- local->flags,
- local->fd);
- if (need_break)
- break;
- }
- }
-
- return 0;
-}
-
-
-int32_t
-unify_open_readlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- const char *path)
-{
- int16_t index = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- STACK_UNWIND (frame, -1, ENOENT);
- return 0;
- }
-
- if (path[0] == '/') {
- local->name = strdup (path);
- ERR_ABORT (local->name);
- } else {
- char *tmp_str = strdup (local->loc1.path);
- char *tmp_base = dirname (tmp_str);
- local->name = CALLOC (1, ZR_PATH_MAX);
- strcpy (local->name, tmp_base);
- strncat (local->name, "/", 1);
- strcat (local->name, path);
- FREE (tmp_str);
- }
-
- local->list = CALLOC (1, sizeof (int16_t) * 3);
- ERR_ABORT (local->list);
- local->call_count = priv->child_count + 1;
- local->op_ret = -1;
- for (index = 0; index <= priv->child_count; index++) {
- /* Send the lookup to all the nodes including namespace */
- STACK_WIND_COOKIE (frame,
- unify_open_lookup_cbk,
- (void *)(long)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->lookup,
- &local->loc1,
- NULL);
- }
-
- return 0;
-}
-#endif /* GF_DARWIN_HOST_OS */
-
-/**
- * unify_open -
- */
-int32_t
-unify_open (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags,
- fd_t *fd)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = NULL;
- int16_t *list = NULL;
- int16_t index = 0;
- int16_t file_list[3] = {0,};
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Init */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- local->fd = fd;
- local->flags = flags;
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- local->list = list;
- file_list[0] = priv->child_count; /* Thats namespace */
- file_list[2] = -1;
- for (index = 0; list[index] != -1; index++) {
- local->call_count++;
- if (list[index] != priv->child_count)
- file_list[1] = list[index];
- }
-
- if (local->call_count != 2) {
- /* If the lookup was done for file */
- gf_log (this->name, GF_LOG_ERROR,
- "%s: entry_count is %d",
- loc->path, local->call_count);
- for (index = 0; local->list[index] != -1; index++)
- gf_log (this->name, GF_LOG_ERROR, "%s: found on %s",
- loc->path, priv->xl_array[list[index]]->name);
-
- if (local->call_count < 2) {
- gf_log (this->name, GF_LOG_ERROR,
- "returning EIO as file found on onlyone node");
- STACK_UNWIND (frame, -1, EIO, fd);
- return 0;
- }
- }
-
-#ifdef GF_DARWIN_HOST_OS
- /* Handle symlink here */
- if (S_ISLNK (loc->inode->st_mode)) {
- /* Callcount doesn't matter here */
- STACK_WIND (frame,
- unify_open_readlink_cbk,
- NS(this),
- NS(this)->fops->readlink,
- loc, ZR_PATH_MAX);
- return 0;
- }
-#endif /* GF_DARWIN_HOST_OS */
-
- local->call_count = 2;
- for (index = 0; file_list[index] != -1; index++) {
- char need_break = (file_list[index+1] == -1);
- STACK_WIND_COOKIE (frame,
- unify_open_cbk,
- priv->xl_array[file_list[index]], //cookie
- priv->xl_array[file_list[index]],
- priv->xl_array[file_list[index]]->fops->open,
- loc,
- flags,
- fd);
- if (need_break)
- break;
- }
-
- return 0;
-}
-
-
-int32_t
-unify_create_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- unify_local_t *local = frame->local;
- inode_t *inode = local->loc1.inode;
-
- unify_local_wipe (local);
-
- STACK_UNWIND (frame, local->op_ret, local->op_errno, local->fd,
- inode, &local->stbuf);
-
- return 0;
-}
-
-/**
- * unify_create_open_cbk -
- */
-int32_t
-unify_create_open_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- int ret = 0;
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
- inode_t *inode = NULL;
- xlator_t *child = NULL;
- uint64_t tmp_value = 0;
-
- LOCK (&frame->lock);
- {
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- if (NS(this) != (xlator_t *)cookie) {
- /* Store child node's ptr, used in all
- the f*** / FileIO calls */
- /* TODO: log on failure */
- ret = fd_ctx_get (fd, this, &tmp_value);
- cookie = (void *)(long)tmp_value;
- } else {
- /* NOTE: open successful on namespace.
- * fd's ctx can be used to identify open
- * failure on storage subvolume. cool
- * ide ;) */
- local->failed = 0;
- }
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- ((xlator_t *)cookie)->name,
- local->loc1.path, strerror (op_errno));
- local->op_errno = op_errno;
- local->failed = 1;
- }
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->failed == 1 && (local->op_ret >= 0)) {
- local->call_count = 1;
- /* return -1 to user */
- local->op_ret = -1;
- local->op_errno = EIO;
- local->fd = fd;
- local->call_count = 1;
-
- if (!fd_ctx_get (local->fd, this, &tmp_value)) {
- child = (xlator_t *)(long)tmp_value;
-
- gf_log (this->name, GF_LOG_ERROR,
- "Create success on child node, "
- "failed on namespace");
-
- STACK_WIND (frame,
- unify_create_unlink_cbk,
- child,
- child->fops->unlink,
- &local->loc1);
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "Create success on namespace, "
- "failed on child node");
-
- STACK_WIND (frame,
- unify_create_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
- }
- return 0;
- }
- inode = local->loc1.inode;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno, fd,
- inode, &local->stbuf);
- }
- return 0;
-}
-
-/**
- * unify_create_lookup_cbk -
- */
-int32_t
-unify_create_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf,
- dict_t *dict)
-{
- int32_t callcnt = 0;
- int16_t index = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- priv->xl_array[(long)cookie]->name,
- local->loc1.path, strerror (op_errno));
- local->op_errno = op_errno;
- local->failed = 1;
- }
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- local->list[local->index++] = (int16_t)(long)cookie;
- if (NS(this) == priv->xl_array[(long)cookie]) {
- local->st_ino = buf->st_ino;
- } else {
- local->stbuf = *buf;
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- int16_t *list = local->list;
- int16_t file_list[3] = {0,};
- local->op_ret = -1;
-
- local->list [local->index] = -1;
- file_list[0] = list[0];
- file_list[1] = list[1];
- file_list[2] = -1;
-
- local->stbuf.st_ino = local->st_ino;
- /* TODO: log on failure */
- inode_ctx_put (local->loc1.inode, this,
- (uint64_t)(long)local->list);
-
- if (local->index != 2) {
- /* Lookup failed, can't do open */
- gf_log (this->name, GF_LOG_ERROR,
- "%s: present on %d nodes",
- local->loc1.path, local->index);
- file_list[0] = priv->child_count;
- for (index = 0; list[index] != -1; index++) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: found on %s", local->loc1.path,
- priv->xl_array[list[index]]->name);
- if (list[index] != priv->child_count)
- file_list[1] = list[index];
- }
-
- if (local->index < 2) {
- unify_local_wipe (local);
- gf_log (this->name, GF_LOG_ERROR,
- "returning EIO as file found on "
- "only one node");
- STACK_UNWIND (frame, -1, EIO,
- local->fd, inode, NULL);
- return 0;
- }
- }
- /* Everything is perfect :) */
- local->call_count = 2;
-
- for (index = 0; file_list[index] != -1; index++) {
- char need_break = (file_list[index+1] == -1);
- STACK_WIND_COOKIE (frame,
- unify_create_open_cbk,
- priv->xl_array[file_list[index]],
- priv->xl_array[file_list[index]],
- priv->xl_array[file_list[index]]->fops->open,
- &local->loc1,
- local->flags,
- local->fd);
- if (need_break)
- break;
- }
- }
-
- return 0;
-}
-
-
-/**
- * unify_create_cbk -
- */
-int32_t
-unify_create_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct stat *buf)
-{
- int ret = 0;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
- inode_t *tmp_inode = NULL;
-
- if (op_ret == -1) {
- /* send unlink () on Namespace */
- local->op_errno = op_errno;
- local->op_ret = -1;
- local->call_count = 1;
- gf_log (this->name, GF_LOG_ERROR,
- "create failed on %s (file %s, error %s), "
- "sending unlink to namespace",
- prev_frame->this->name,
- local->loc1.path, strerror (op_errno));
-
- STACK_WIND (frame,
- unify_create_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
-
- return 0;
- }
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- local->stbuf = *buf;
- /* Just inode number should be from NS node */
- local->stbuf.st_ino = local->st_ino;
-
- /* TODO: log on failure */
- ret = fd_ctx_set (fd, this, (uint64_t)(long)prev_frame->this);
- }
-
- tmp_inode = local->loc1.inode;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno, local->fd,
- tmp_inode, &local->stbuf);
-
- return 0;
-}
-
-/**
- * unify_ns_create_cbk -
- *
- */
-int32_t
-unify_ns_create_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct stat *buf)
-{
- struct sched_ops *sched_ops = NULL;
- xlator_t *sched_xl = NULL;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t *list = NULL;
- int16_t index = 0;
-
- if (op_ret == -1) {
- /* No need to send create request to other servers, as
- namespace action failed. Handle exclusive create here. */
- if ((op_errno != EEXIST) ||
- ((op_errno == EEXIST) &&
- ((local->flags & O_EXCL) == O_EXCL))) {
- /* If its just a create call without O_EXCL,
- don't do this */
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s): %s",
- local->loc1.path, strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf);
- return 0;
- }
- }
-
- if (op_ret >= 0) {
- /* Get the inode number from the NS node */
- local->st_ino = buf->st_ino;
-
- local->op_ret = -1;
-
- /* Start the mapping list */
- list = CALLOC (1, sizeof (int16_t) * 3);
- ERR_ABORT (list);
- inode_ctx_put (inode, this, (uint64_t)(long)list);
- list[0] = priv->child_count;
- list[2] = -1;
-
- /* This means, file doesn't exist anywhere in the Filesystem */
- sched_ops = priv->sched_ops;
-
- /* Send create request to the scheduled node now */
- sched_xl = sched_ops->schedule (this, local->loc1.path);
- if (sched_xl == NULL)
- {
- /* send unlink () on Namespace */
- local->op_errno = ENOTCONN;
- local->op_ret = -1;
- local->call_count = 1;
- gf_log (this->name, GF_LOG_ERROR,
- "no node online to schedule create:(file %s) "
- "sending unlink to namespace",
- (local->loc1.path)?local->loc1.path:"");
-
- STACK_WIND (frame,
- unify_create_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
-
- return 0;
- }
-
- for (index = 0; index < priv->child_count; index++)
- if (sched_xl == priv->xl_array[index])
- break;
- list[1] = index;
-
- STACK_WIND (frame, unify_create_cbk,
- sched_xl, sched_xl->fops->create,
- &local->loc1, local->flags, local->mode, fd);
- } else {
- /* File already exists, and there is no O_EXCL flag */
-
- gf_log (this->name, GF_LOG_DEBUG,
- "File(%s) already exists on namespace, sending "
- "open instead", local->loc1.path);
-
- local->list = CALLOC (1, sizeof (int16_t) * 3);
- ERR_ABORT (local->list);
- local->call_count = priv->child_count + 1;
- local->op_ret = -1;
- for (index = 0; index <= priv->child_count; index++) {
- /* Send lookup() to all nodes including namespace */
- STACK_WIND_COOKIE (frame,
- unify_create_lookup_cbk,
- (void *)(long)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->lookup,
- &local->loc1,
- NULL);
- }
- }
- return 0;
-}
-
-/**
- * unify_create - create a file in global namespace first, so other
- * clients can see them. Create the file in storage nodes in background.
- */
-int32_t
-unify_create (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags,
- mode_t mode,
- fd_t *fd)
-{
- unify_local_t *local = NULL;
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->mode = mode;
- local->flags = flags;
- local->fd = fd;
-
- loc_copy (&local->loc1, loc);
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, fd, loc->inode, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_ns_create_cbk,
- NS(this),
- NS(this)->fops->create,
- loc,
- flags | O_EXCL,
- mode,
- fd);
-
- return 0;
-}
-
-
-/**
- * unify_opendir_cbk -
- */
-int32_t
-unify_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- STACK_UNWIND (frame, op_ret, op_errno, fd);
-
- return 0;
-}
-
-/**
- * unify_opendir -
- */
-int32_t
-unify_opendir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- fd_t *fd)
-{
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- STACK_WIND (frame, unify_opendir_cbk,
- NS(this), NS(this)->fops->opendir, loc, fd);
-
- return 0;
-}
-
-
-/**
- * unify_chmod -
- */
-int32_t
-unify_chmod (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode)
-{
- unify_local_t *local = NULL;
- unify_private_t *priv = this->private;
- int32_t index = 0;
- int32_t callcnt = 0;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
-
- loc_copy (&local->loc1, loc);
- local->st_ino = loc->inode->ino;
-
- if (S_ISDIR (loc->inode->st_mode)) {
- local->call_count = priv->child_count + 1;
-
- for (index = 0; index < (priv->child_count + 1); index++) {
- STACK_WIND (frame,
- unify_buf_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->chmod,
- loc, mode);
- }
- } else {
- inode_ctx_get (loc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
-
- for (index = 0; local->list[index] != -1; index++) {
- local->call_count++;
- callcnt++;
- }
-
- for (index = 0; local->list[index] != -1; index++) {
- STACK_WIND (frame,
- unify_buf_cbk,
- priv->xl_array[local->list[index]],
- priv->xl_array[local->list[index]]->fops->chmod,
- loc,
- mode);
- if (!--callcnt)
- break;
- }
- }
-
- return 0;
-}
-
-/**
- * unify_chown -
- */
-int32_t
-unify_chown (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- uid_t uid,
- gid_t gid)
-{
- unify_local_t *local = NULL;
- unify_private_t *priv = this->private;
- int32_t index = 0;
- int32_t callcnt = 0;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- local->st_ino = loc->inode->ino;
-
- if (S_ISDIR (loc->inode->st_mode)) {
- local->call_count = priv->child_count + 1;
-
- for (index = 0; index < (priv->child_count + 1); index++) {
- STACK_WIND (frame,
- unify_buf_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->chown,
- loc, uid, gid);
- }
- } else {
- inode_ctx_get (loc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
-
- for (index = 0; local->list[index] != -1; index++) {
- local->call_count++;
- callcnt++;
- }
-
- for (index = 0; local->list[index] != -1; index++) {
- STACK_WIND (frame,
- unify_buf_cbk,
- priv->xl_array[local->list[index]],
- priv->xl_array[local->list[index]]->fops->chown,
- loc, uid, gid);
- if (!--callcnt)
- break;
- }
- }
-
- return 0;
-}
-
-
-/**
- * unify_truncate_cbk -
- */
-int32_t
-unify_truncate_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- prev_frame->this->name,
- (local->loc1.path)?local->loc1.path:"",
- strerror (op_errno));
- local->op_errno = op_errno;
- if (!((op_errno == ENOENT) && priv->optimist))
- local->op_ret = -1;
- }
-
- if (op_ret >= 0) {
- if (NS (this) == prev_frame->this) {
- local->st_ino = buf->st_ino;
- /* If the entry is directory, get the
- stat from NS node */
- if (S_ISDIR (buf->st_mode) ||
- !local->stbuf.st_blksize) {
- local->stbuf = *buf;
- }
- }
-
- if ((!S_ISDIR (buf->st_mode)) &&
- (NS (this) != prev_frame->this)) {
- /* If file, take the stat info from
- Storage node. */
- local->stbuf = *buf;
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->st_ino)
- local->stbuf.st_ino = local->st_ino;
- else
- local->op_ret = -1;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stbuf);
- }
-
- return 0;
-}
-
-/**
- * unify_truncate -
- */
-int32_t
-unify_truncate (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- off_t offset)
-{
- unify_local_t *local = NULL;
- unify_private_t *priv = this->private;
- int32_t index = 0;
- int32_t callcnt = 0;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- local->st_ino = loc->inode->ino;
-
- if (S_ISDIR (loc->inode->st_mode)) {
- local->call_count = 1;
-
- STACK_WIND (frame,
- unify_buf_cbk,
- NS(this),
- NS(this)->fops->stat,
- loc);
- } else {
- local->op_ret = 0;
- inode_ctx_get (loc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
-
- for (index = 0; local->list[index] != -1; index++) {
- local->call_count++;
- callcnt++;
- }
-
- /* Don't send truncate to NS node */
- STACK_WIND (frame, unify_truncate_cbk, NS(this),
- NS(this)->fops->stat, loc);
- callcnt--;
-
- for (index = 0; local->list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[local->list[index]]) {
- STACK_WIND (frame,
- unify_truncate_cbk,
- priv->xl_array[local->list[index]],
- priv->xl_array[local->list[index]]->fops->truncate,
- loc,
- offset);
- if (!--callcnt)
- break;
- }
- }
- }
-
- return 0;
-}
-
-/**
- * unify_utimens -
- */
-int32_t
-unify_utimens (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- struct timespec tv[2])
-{
- unify_local_t *local = NULL;
- unify_private_t *priv = this->private;
- int32_t index = 0;
- int32_t callcnt = 0;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- local->st_ino = loc->inode->ino;
-
- if (S_ISDIR (loc->inode->st_mode)) {
- local->call_count = priv->child_count + 1;
-
- for (index = 0; index < (priv->child_count + 1); index++) {
- STACK_WIND (frame,
- unify_buf_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->utimens,
- loc, tv);
- }
- } else {
- inode_ctx_get (loc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
-
- for (index = 0; local->list[index] != -1; index++) {
- local->call_count++;
- callcnt++;
- }
-
- for (index = 0; local->list[index] != -1; index++) {
- STACK_WIND (frame,
- unify_buf_cbk,
- priv->xl_array[local->list[index]],
- priv->xl_array[local->list[index]]->fops->utimens,
- loc,
- tv);
- if (!--callcnt)
- break;
- }
- }
-
- return 0;
-}
-
-/**
- * unify_readlink_cbk -
- */
-int32_t
-unify_readlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- const char *path)
-{
- STACK_UNWIND (frame, op_ret, op_errno, path);
- return 0;
-}
-
-/**
- * unify_readlink - Read the link only from the storage node.
- */
-int32_t
-unify_readlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- size_t size)
-{
- unify_private_t *priv = this->private;
- int32_t entry_count = 0;
- int16_t *list = NULL;
- int16_t index = 0;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++)
- entry_count++;
-
- if (entry_count >= 2) {
- for (index = 0; list[index] != -1; index++) {
- if (priv->xl_array[list[index]] != NS(this)) {
- STACK_WIND (frame,
- unify_readlink_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->readlink,
- loc,
- size);
- break;
- }
- }
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "returning ENOENT, no softlink files found "
- "on storage node");
- STACK_UNWIND (frame, -1, ENOENT, NULL);
- }
-
- return 0;
-}
-
-
-/**
- * unify_unlink_cbk -
- */
-int32_t
-unify_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret == 0 || ((op_errno == ENOENT) && priv->optimist))
- local->op_ret = 0;
- if (op_ret == -1)
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno);
- }
-
- return 0;
-}
-
-
-/**
- * unify_unlink -
- */
-int32_t
-unify_unlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = NULL;
- int16_t *list = NULL;
- int16_t index = 0;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
-
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++)
- local->call_count++;
-
- if (local->call_count) {
- for (index = 0; list[index] != -1; index++) {
- char need_break = (list[index+1] == -1);
- STACK_WIND (frame,
- unify_unlink_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->unlink,
- loc);
- if (need_break)
- break;
- }
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: returning ENOENT", loc->path);
- STACK_UNWIND (frame, -1, ENOENT);
- }
-
- return 0;
-}
-
-
-/**
- * unify_readv_cbk -
- */
-int32_t
-unify_readv_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct stat *stbuf,
- struct iobref *iobref)
-{
- STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf, iobref);
- return 0;
-}
-
-/**
- * unify_readv -
- */
-int32_t
-unify_readv (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame,
- unify_readv_cbk,
- child,
- child->fops->readv,
- fd,
- size,
- offset);
-
-
- return 0;
-}
-
-/**
- * unify_writev_cbk -
- */
-int32_t
-unify_writev_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *stbuf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, stbuf);
- return 0;
-}
-
-/**
- * unify_writev -
- */
-int32_t
-unify_writev (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t off,
- struct iobref *iobref)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame,
- unify_writev_cbk,
- child,
- child->fops->writev,
- fd,
- vector,
- count,
- off,
- iobref);
-
- return 0;
-}
-
-/**
- * unify_ftruncate -
- */
-int32_t
-unify_ftruncate (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- off_t offset)
-{
- xlator_t *child = NULL;
- unify_local_t *local = NULL;
- uint64_t tmp_child = 0;
-
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR(fd);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->op_ret = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- local->call_count = 2;
-
- STACK_WIND (frame, unify_truncate_cbk,
- child, child->fops->ftruncate,
- fd, offset);
-
- STACK_WIND (frame, unify_truncate_cbk,
- NS(this), NS(this)->fops->fstat,
- fd);
-
- return 0;
-}
-
-
-/**
- * unify_fchmod -
- */
-int32_t
-unify_fchmod (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- mode_t mode)
-{
- unify_local_t *local = NULL;
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR(fd);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->st_ino = fd->inode->ino;
-
- if (!fd_ctx_get (fd, this, &tmp_child)) {
- /* If its set, then its file */
- child = (xlator_t *)(long)tmp_child;
-
- local->call_count = 2;
-
- STACK_WIND (frame, unify_buf_cbk, child,
- child->fops->fchmod, fd, mode);
-
- STACK_WIND (frame, unify_buf_cbk, NS(this),
- NS(this)->fops->fchmod, fd, mode);
-
- } else {
- /* this is an directory */
- local->call_count = 1;
-
- STACK_WIND (frame, unify_buf_cbk,
- NS(this), NS(this)->fops->fchmod, fd, mode);
- }
-
- return 0;
-}
-
-/**
- * unify_fchown -
- */
-int32_t
-unify_fchown (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- uid_t uid,
- gid_t gid)
-{
- unify_local_t *local = NULL;
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR(fd);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->st_ino = fd->inode->ino;
-
- if (!fd_ctx_get (fd, this, &tmp_child)) {
- /* If its set, then its file */
- child = (xlator_t *)(long)tmp_child;
-
- local->call_count = 2;
-
- STACK_WIND (frame, unify_buf_cbk, child,
- child->fops->fchown, fd, uid, gid);
-
- STACK_WIND (frame, unify_buf_cbk, NS(this),
- NS(this)->fops->fchown, fd, uid, gid);
- } else {
- local->call_count = 1;
-
- STACK_WIND (frame, unify_buf_cbk,
- NS(this), NS(this)->fops->fchown,
- fd, uid, gid);
- }
-
- return 0;
-}
-
-/**
- * unify_flush_cbk -
- */
-int32_t
-unify_flush_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-/**
- * unify_flush -
- */
-int32_t
-unify_flush (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_flush_cbk, child,
- child->fops->flush, fd);
-
- return 0;
-}
-
-
-/**
- * unify_fsync_cbk -
- */
-int32_t
-unify_fsync_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-/**
- * unify_fsync -
- */
-int32_t
-unify_fsync (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_fsync_cbk, child,
- child->fops->fsync, fd, flags);
-
- return 0;
-}
-
-/**
- * unify_fstat - Send fstat FOP to Namespace only if its directory, and to
- * both namespace and the storage node if its a file.
- */
-int32_t
-unify_fstat (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
-{
- unify_local_t *local = NULL;
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR(fd);
-
- INIT_LOCAL (frame, local);
- local->st_ino = fd->inode->ino;
-
- if (!fd_ctx_get (fd, this, &tmp_child)) {
- /* If its set, then its file */
- child = (xlator_t *)(long)tmp_child;
- local->call_count = 2;
-
- STACK_WIND (frame, unify_buf_cbk, child,
- child->fops->fstat, fd);
-
- STACK_WIND (frame, unify_buf_cbk, NS(this),
- NS(this)->fops->fstat, fd);
-
- } else {
- /* this is an directory */
- local->call_count = 1;
- STACK_WIND (frame, unify_buf_cbk, NS(this),
- NS(this)->fops->fstat, fd);
- }
-
- return 0;
-}
-
-/**
- * unify_getdents_cbk -
- */
-int32_t
-unify_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count)
-{
- STACK_UNWIND (frame, op_ret, op_errno, entry, count);
- return 0;
-}
-
-/**
- * unify_getdents - send the FOP request to all the nodes.
- */
-int32_t
-unify_getdents (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset,
- int32_t flag)
-{
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR (fd);
-
- STACK_WIND (frame, unify_getdents_cbk, NS(this),
- NS(this)->fops->getdents, fd, size, offset, flag);
-
- return 0;
-}
-
-
-/**
- * unify_readdir_cbk -
- */
-int32_t
-unify_readdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *buf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
-
- return 0;
-}
-
-/**
- * unify_readdir - send the FOP request to all the nodes.
- */
-int32_t
-unify_readdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset)
-{
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR (fd);
-
- STACK_WIND (frame, unify_readdir_cbk, NS(this),
- NS(this)->fops->readdir, fd, size, offset);
-
- return 0;
-}
-
-
-/**
- * unify_fsyncdir_cbk -
- */
-int32_t
-unify_fsyncdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
-
- return 0;
-}
-
-/**
- * unify_fsyncdir -
- */
-int32_t
-unify_fsyncdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags)
-{
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR (fd);
-
- STACK_WIND (frame, unify_fsyncdir_cbk,
- NS(this), NS(this)->fops->fsyncdir, fd, flags);
-
- return 0;
-}
-
-/**
- * unify_lk_cbk - UNWIND frame with the proper return arguments.
- */
-int32_t
-unify_lk_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct flock *lock)
-{
- STACK_UNWIND (frame, op_ret, op_errno, lock);
- return 0;
-}
-
-/**
- * unify_lk - Send it to all the storage nodes, (should be 1) which has file.
- */
-int32_t
-unify_lk (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t cmd,
- struct flock *lock)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_lk_cbk, child,
- child->fops->lk, fd, cmd, lock);
-
- return 0;
-}
-
-
-int32_t
-unify_setxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno);
-
-static int32_t
-unify_setxattr_file_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- unify_private_t *private = this->private;
- unify_local_t *local = frame->local;
- xlator_t *sched_xl = NULL;
- struct sched_ops *sched_ops = NULL;
-
- if (op_ret == -1) {
- if (!ENOTSUP)
- gf_log (this->name, GF_LOG_ERROR,
- "setxattr with XATTR_CREATE on ns: "
- "path(%s) key(%s): %s",
- local->loc1.path, local->name,
- strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
- }
-
- LOCK (&frame->lock);
- {
- local->failed = 0;
- local->op_ret = 0;
- local->op_errno = 0;
- local->call_count = 1;
- }
- UNLOCK (&frame->lock);
-
- /* schedule XATTR_CREATE on one of the child node */
- sched_ops = private->sched_ops;
-
- /* Send create request to the scheduled node now */
- sched_xl = sched_ops->schedule (this, local->name);
- if (!sched_xl) {
- STACK_UNWIND (frame, -1, ENOTCONN);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_setxattr_cbk,
- sched_xl,
- sched_xl->fops->setxattr,
- &local->loc1,
- local->dict,
- local->flags);
- return 0;
-}
-
-/**
- * unify_setxattr_cbk - When all the child nodes return, UNWIND frame.
- */
-int32_t
-unify_setxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
- dict_t *dict = NULL;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, (((op_errno == ENOENT) ||
- (op_errno == ENOTSUP))?
- GF_LOG_DEBUG : GF_LOG_ERROR),
- "child(%s): path(%s): %s",
- prev_frame->this->name,
- (local->loc1.path)?local->loc1.path:"",
- strerror (op_errno));
- if (local->failed == -1) {
- local->failed = 1;
- }
- local->op_errno = op_errno;
- } else {
- local->failed = 0;
- local->op_ret = op_ret;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->failed && local->name &&
- ZR_FILE_CONTENT_REQUEST(local->name)) {
- dict = get_new_dict ();
- dict_set (dict, local->dict->members_list->key,
- data_from_dynptr(NULL, 0));
- dict_ref (dict);
-
- local->call_count = 1;
-
- STACK_WIND (frame,
- unify_setxattr_file_cbk,
- NS(this),
- NS(this)->fops->setxattr,
- &local->loc1,
- dict,
- XATTR_CREATE);
-
- dict_unref (dict);
- return 0;
- }
-
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno);
- }
-
- return 0;
-}
-
-/**
- * unify_sexattr - This function should be sent to all the storage nodes,
- * which contains the file, (excluding namespace).
- */
-int32_t
-unify_setxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *dict,
- int32_t flags)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = NULL;
- int16_t *list = NULL;
- int16_t index = 0;
- int32_t call_count = 0;
- uint64_t tmp_list = 0;
- data_pair_t *trav = dict->members_list;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->failed = -1;
- loc_copy (&local->loc1, loc);
-
- if (S_ISDIR (loc->inode->st_mode)) {
-
- if (trav && trav->key && ZR_FILE_CONTENT_REQUEST(trav->key)) {
- /* direct the storage xlators to change file
- content only if file exists */
- local->flags = flags;
- local->dict = dict;
- local->name = strdup (trav->key);
- flags |= XATTR_REPLACE;
- }
-
- local->call_count = priv->child_count;
- for (index = 0; index < priv->child_count; index++) {
- STACK_WIND (frame,
- unify_setxattr_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->setxattr,
- loc, dict, flags);
- }
- return 0;
- }
-
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[list[index]]) {
- local->call_count++;
- call_count++;
- }
- }
-
- if (local->call_count) {
- for (index = 0; list[index] != -1; index++) {
- if (priv->xl_array[list[index]] != NS(this)) {
- STACK_WIND (frame,
- unify_setxattr_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->setxattr,
- loc,
- dict,
- flags);
- if (!--call_count)
- break;
- }
- }
- return 0;
- }
-
- /* No entry in storage nodes */
- gf_log (this->name, GF_LOG_DEBUG,
- "returning ENOENT, file not found on storage node.");
- STACK_UNWIND (frame, -1, ENOENT);
-
- return 0;
-}
-
-
-/**
- * unify_getxattr_cbk - This function is called from only one child, so, no
- * need of any lock or anything else, just send it to above layer
- */
-int32_t
-unify_getxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *value)
-{
- int32_t callcnt = 0;
- dict_t *local_value = NULL;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name,
- (((op_errno == ENOENT) ||
- (op_errno == ENODATA) ||
- (op_errno == ENOTSUP)) ?
- GF_LOG_DEBUG : GF_LOG_ERROR),
- "child(%s): path(%s): %s",
- prev_frame->this->name,
- (local->loc1.path)?local->loc1.path:"",
- strerror (op_errno));
- } else {
- if (!local->dict)
- local->dict = dict_ref (value);
- local->op_ret = op_ret;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local_value = local->dict;
- local->dict = NULL;
-
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local_value);
-
- if (local_value)
- dict_unref (local_value);
- }
-
- return 0;
-}
-
-
-/**
- * unify_getxattr - This FOP is sent to only the storage node.
- */
-int32_t
-unify_getxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
-{
- unify_private_t *priv = this->private;
- int16_t *list = NULL;
- int16_t index = 0;
- int16_t count = 0;
- unify_local_t *local = NULL;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
- INIT_LOCAL (frame, local);
-
- if (S_ISDIR (loc->inode->st_mode)) {
- local->call_count = priv->child_count;
- for (index = 0; index < priv->child_count; index++)
- STACK_WIND (frame,
- unify_getxattr_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getxattr,
- loc,
- name);
- return 0;
- }
-
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[list[index]]) {
- local->call_count++;
- count++;
- }
- }
-
- if (count) {
- for (index = 0; list[index] != -1; index++) {
- if (priv->xl_array[list[index]] != NS(this)) {
- STACK_WIND (frame,
- unify_getxattr_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->getxattr,
- loc,
- name);
- if (!--count)
- break;
- }
- }
- } else {
- dict_t *tmp_dict = get_new_dict ();
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: returning ENODATA, no file found on storage node",
- loc->path);
- STACK_UNWIND (frame, -1, ENODATA, tmp_dict);
- dict_destroy (tmp_dict);
- }
-
- return 0;
-}
-
-/**
- * unify_removexattr_cbk - Wait till all the child node returns the call
- * and then UNWIND to above layer.
- */
-int32_t
-unify_removexattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret == -1) {
- local->op_errno = op_errno;
- if (op_errno != ENOTSUP)
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- prev_frame->this->name,
- local->loc1.path, strerror (op_errno));
- } else {
- local->op_ret = op_ret;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- STACK_UNWIND (frame, local->op_ret, local->op_errno);
- }
-
- return 0;
-}
-
-/**
- * unify_removexattr - Send it to all the child nodes which has the files.
- */
-int32_t
-unify_removexattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = NULL;
- int16_t *list = NULL;
- int16_t index = 0;
- int32_t call_count = 0;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
-
- if (S_ISDIR (loc->inode->st_mode)) {
- local->call_count = priv->child_count;
- for (index = 0; index < priv->child_count; index++)
- STACK_WIND (frame,
- unify_removexattr_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->removexattr,
- loc,
- name);
-
- return 0;
- }
-
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[list[index]]) {
- local->call_count++;
- call_count++;
- }
- }
-
- if (local->call_count) {
- for (index = 0; list[index] != -1; index++) {
- if (priv->xl_array[list[index]] != NS(this)) {
- STACK_WIND (frame,
- unify_removexattr_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->removexattr,
- loc,
- name);
- if (!--call_count)
- break;
- }
- }
- return 0;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: returning ENOENT, not found on storage node.", loc->path);
- STACK_UNWIND (frame, -1, ENOENT);
-
- return 0;
-}
-
-
-int32_t
-unify_mknod_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- unify_local_t *local = frame->local;
-
- if (op_ret == -1)
- gf_log (this->name, GF_LOG_ERROR,
- "%s: %s", local->loc1.path, strerror (op_errno));
-
- unify_local_wipe (local);
- /* No log required here as this -1 is for mknod call */
- STACK_UNWIND (frame, -1, local->op_errno, NULL, NULL);
- return 0;
-}
-
-/**
- * unify_mknod_cbk -
- */
-int32_t
-unify_mknod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
-{
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "mknod failed on storage node, sending unlink to "
- "namespace");
- local->op_errno = op_errno;
- STACK_WIND (frame,
- unify_mknod_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
- return 0;
- }
-
- local->stbuf = *buf;
- local->stbuf.st_ino = local->st_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, &local->stbuf);
- return 0;
-}
-
-/**
- * unify_ns_mknod_cbk -
- */
-int32_t
-unify_ns_mknod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
-{
- struct sched_ops *sched_ops = NULL;
- xlator_t *sched_xl = NULL;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t *list = NULL;
- int16_t index = 0;
- call_frame_t *prev_frame = cookie;
-
- if (op_ret == -1) {
- /* No need to send mknod request to other servers,
- * as namespace action failed
- */
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- prev_frame->this->name, local->loc1.path,
- strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
- return 0;
- }
-
- /* Create one inode for this entry */
- local->op_ret = 0;
- local->stbuf = *buf;
- local->st_ino = buf->st_ino;
-
- list = CALLOC (1, sizeof (int16_t) * 3);
- ERR_ABORT (list);
- list[0] = priv->child_count;
- list[2] = -1;
- inode_ctx_put (inode, this, (uint64_t)(long)list);
-
- sched_ops = priv->sched_ops;
-
- /* Send mknod request to scheduled node now */
- sched_xl = sched_ops->schedule (this, local->loc1.path);
- if (!sched_xl) {
- gf_log (this->name, GF_LOG_ERROR,
- "mknod failed on storage node, no node online "
- "at the moment, sending unlink to NS");
- local->op_errno = ENOTCONN;
- STACK_WIND (frame,
- unify_mknod_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
-
- return 0;
- }
-
- for (index = 0; index < priv->child_count; index++)
- if (sched_xl == priv->xl_array[index])
- break;
- list[1] = index;
-
- STACK_WIND (frame, unify_mknod_cbk,
- sched_xl, sched_xl->fops->mknod,
- &local->loc1, local->mode, local->dev);
-
- return 0;
-}
-
-/**
- * unify_mknod - Create a device on namespace first, and later create on
- * the storage node.
- */
-int32_t
-unify_mknod (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode,
- dev_t rdev)
-{
- unify_local_t *local = NULL;
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->mode = mode;
- local->dev = rdev;
- loc_copy (&local->loc1, loc);
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, loc->inode, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_ns_mknod_cbk,
- NS(this),
- NS(this)->fops->mknod,
- loc,
- mode,
- rdev);
-
- return 0;
-}
-
-int32_t
-unify_symlink_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- unify_local_t *local = frame->local;
- if (op_ret == -1)
- gf_log (this->name, GF_LOG_ERROR,
- "%s: %s", local->loc1.path, strerror (op_errno));
-
- unify_local_wipe (local);
- STACK_UNWIND (frame, -1, local->op_errno, NULL, NULL);
- return 0;
-}
-
-/**
- * unify_symlink_cbk -
- */
-int32_t
-unify_symlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
-{
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- /* Symlink on storage node failed, hence send unlink
- to the NS node */
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_ERROR,
- "symlink on storage node failed, sending unlink "
- "to namespace");
-
- STACK_WIND (frame,
- unify_symlink_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
-
- return 0;
- }
-
- local->stbuf = *buf;
- local->stbuf.st_ino = local->st_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, &local->stbuf);
-
- return 0;
-}
-
-/**
- * unify_ns_symlink_cbk -
- */
-int32_t
-unify_ns_symlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
-{
-
- struct sched_ops *sched_ops = NULL;
- xlator_t *sched_xl = NULL;
- int16_t *list = NULL;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t index = 0;
-
- if (op_ret == -1) {
- /* No need to send symlink request to other servers,
- * as namespace action failed
- */
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s): %s",
- local->loc1.path, strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, NULL, buf);
- return 0;
- }
-
- /* Create one inode for this entry */
- local->op_ret = 0;
- local->st_ino = buf->st_ino;
-
- /* Start the mapping list */
-
- list = CALLOC (1, sizeof (int16_t) * 3);
- ERR_ABORT (list);
- list[0] = priv->child_count; //namespace's index
- list[2] = -1;
- inode_ctx_put (inode, this, (uint64_t)(long)list);
-
- sched_ops = priv->sched_ops;
-
- /* Send symlink request to all the nodes now */
- sched_xl = sched_ops->schedule (this, local->loc1.path);
- if (!sched_xl) {
- /* Symlink on storage node failed, hence send unlink
- to the NS node */
- local->op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_ERROR,
- "symlink on storage node failed, no node online, "
- "sending unlink to namespace");
-
- STACK_WIND (frame,
- unify_symlink_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
-
- return 0;
- }
-
- for (index = 0; index < priv->child_count; index++)
- if (sched_xl == priv->xl_array[index])
- break;
- list[1] = index;
-
- STACK_WIND (frame,
- unify_symlink_cbk,
- sched_xl,
- sched_xl->fops->symlink,
- local->name,
- &local->loc1);
-
- return 0;
-}
-
-/**
- * unify_symlink -
- */
-int32_t
-unify_symlink (call_frame_t *frame,
- xlator_t *this,
- const char *linkpath,
- loc_t *loc)
-{
- unify_local_t *local = NULL;
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- local->name = strdup (linkpath);
-
- if ((local->name == NULL) ||
- (local->loc1.path == NULL)) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, loc->inode, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_ns_symlink_cbk,
- NS(this),
- NS(this)->fops->symlink,
- linkpath,
- loc);
-
- return 0;
-}
-
-
-int32_t
-unify_rename_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s -> %s): %s",
- prev_frame->this->name,
- local->loc1.path, local->loc2.path,
- strerror (op_errno));
-
- }
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local->stbuf.st_ino = local->st_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stbuf);
- }
- return 0;
-}
-
-int32_t
-unify_ns_rename_undo_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s -> %s): %s",
- local->loc1.path, local->loc2.path,
- strerror (op_errno));
- }
-
- local->stbuf.st_ino = local->st_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno, &local->stbuf);
- return 0;
-}
-
-int32_t
-unify_rename_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- int32_t index = 0;
- int32_t callcnt = 0;
- int16_t *list = NULL;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret >= 0) {
- if (!S_ISDIR (buf->st_mode))
- local->stbuf = *buf;
- local->op_ret = op_ret;
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s -> %s): %s",
- prev_frame->this->name,
- local->loc1.path, local->loc2.path,
- strerror (op_errno));
- local->op_errno = op_errno;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local->stbuf.st_ino = local->st_ino;
- if (S_ISDIR (local->loc1.inode->st_mode)) {
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno, &local->stbuf);
- return 0;
- }
-
- if (local->op_ret == -1) {
- /* TODO: check this logic */
-
- /* Rename failed in storage node, successful on NS,
- * hence, rename back the entries in NS */
- /* NOTE: this will be done only if the destination
- * doesn't exists, if the destination exists, the
- * job of correcting NS is left to self-heal
- */
- if (!local->index) {
- loc_t tmp_oldloc = {
- /* its actual 'newloc->path' */
- .path = local->loc2.path,
- .inode = local->loc1.inode,
- .parent = local->loc2.parent
- };
-
- loc_t tmp_newloc = {
- /* Actual 'oldloc->path' */
- .path = local->loc1.path,
- .parent = local->loc1.parent
- };
-
- gf_log (this->name, GF_LOG_ERROR,
- "rename succussful on namespace, on "
- "stroage node failed, reverting back");
-
- STACK_WIND (frame,
- unify_ns_rename_undo_cbk,
- NS(this),
- NS(this)->fops->rename,
- &tmp_oldloc,
- &tmp_newloc);
- return 0;
- }
- } else {
- /* Rename successful on storage nodes */
-
- int32_t idx = 0;
- int16_t *tmp_list = NULL;
- uint64_t tmp_list_int64 = 0;
- if (local->loc2.inode) {
- inode_ctx_get (local->loc2.inode,
- this, &tmp_list_int64);
- list = (int16_t *)(long)tmp_list_int64;
-
- }
-
- if (list) {
- for (index = 0; list[index] != -1; index++);
- tmp_list = CALLOC (1, index * 2);
- memcpy (tmp_list, list, index * 2);
-
- for (index = 0; list[index] != -1; index++) {
- /* TODO: Check this logic. */
- /* If the destination file exists in
- * the same storage node where we sent
- * 'rename' call, no need to send
- * unlink
- */
- for (idx = 0;
- local->list[idx] != -1; idx++) {
- if (tmp_list[index] == local->list[idx]) {
- tmp_list[index] = priv->child_count;
- continue;
- }
- }
-
- if (NS(this) != priv->xl_array[tmp_list[index]]) {
- local->call_count++;
- callcnt++;
- }
- }
-
- if (local->call_count) {
- if (callcnt > 1)
- gf_log (this->name,
- GF_LOG_ERROR,
- "%s->%s: more (%d) "
- "subvolumes have the "
- "newloc entry",
- local->loc1.path,
- local->loc2.path,
- callcnt);
-
- for (index=0;
- tmp_list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[tmp_list[index]]) {
- STACK_WIND (frame,
- unify_rename_unlink_cbk,
- priv->xl_array[tmp_list[index]],
- priv->xl_array[tmp_list[index]]->fops->unlink,
- &local->loc2);
- if (!--callcnt)
- break;
- }
- }
-
- FREE (tmp_list);
- return 0;
- }
- if (tmp_list)
- FREE (tmp_list);
- }
- }
-
- /* Need not send 'unlink' to storage node */
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret,
- local->op_errno, &local->stbuf);
- }
-
- return 0;
-}
-
-int32_t
-unify_ns_rename_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- int32_t index = 0;
- int32_t callcnt = 0;
- int16_t *list = NULL;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- /* Free local->new_inode */
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s -> %s): %s",
- local->loc1.path, local->loc2.path,
- strerror (op_errno));
-
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
- }
-
- local->stbuf = *buf;
- local->st_ino = buf->st_ino;
-
- /* Everything is fine. */
- if (S_ISDIR (buf->st_mode)) {
- local->call_count = priv->child_count;
- for (index=0; index < priv->child_count; index++) {
- STACK_WIND (frame,
- unify_rename_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->rename,
- &local->loc1,
- &local->loc2);
- }
-
- return 0;
- }
-
- local->call_count = 0;
- /* send rename */
- list = local->list;
- for (index=0; list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[list[index]]) {
- local->call_count++;
- callcnt++;
- }
- }
-
- if (local->call_count) {
- for (index=0; list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[list[index]]) {
- STACK_WIND (frame,
- unify_rename_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->rename,
- &local->loc1,
- &local->loc2);
- if (!--callcnt)
- break;
- }
- }
- } else {
- /* file doesn't seem to be present in storage nodes */
- gf_log (this->name, GF_LOG_CRITICAL,
- "CRITICAL: source file not in storage node, "
- "rename successful on namespace :O");
- unify_local_wipe (local);
- STACK_UNWIND (frame, -1, EIO, NULL);
- }
- return 0;
-}
-
-
-/**
- * unify_rename - One of the tricky function. The deadliest of all :O
- */
-int32_t
-unify_rename (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
-{
- unify_local_t *local = NULL;
- uint64_t tmp_list = 0;
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, oldloc);
- loc_copy (&local->loc2, newloc);
-
- if ((local->loc1.path == NULL) ||
- (local->loc2.path == NULL)) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
- return 0;
- }
-
- inode_ctx_get (oldloc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
-
- STACK_WIND (frame,
- unify_ns_rename_cbk,
- NS(this),
- NS(this)->fops->rename,
- oldloc,
- newloc);
- return 0;
-}
-
-/**
- * unify_link_cbk -
- */
-int32_t
-unify_link_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
-{
- unify_local_t *local = frame->local;
-
- if (op_ret >= 0)
- local->stbuf = *buf;
- local->stbuf.st_ino = local->st_ino;
-
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, &local->stbuf);
-
- return 0;
-}
-
-/**
- * unify_ns_link_cbk -
- */
-int32_t
-unify_ns_link_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- int16_t *list = local->list;
- int16_t index = 0;
-
- if (op_ret == -1) {
- /* No need to send link request to other servers,
- * as namespace action failed
- */
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s -> %s): %s",
- local->loc1.path, local->loc2.path,
- strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
- return 0;
- }
-
- /* Update inode for this entry */
- local->op_ret = 0;
- local->st_ino = buf->st_ino;
-
- /* Send link request to the node now */
- for (index = 0; list[index] != -1; index++) {
- char need_break = (list[index+1] == -1);
- if (priv->xl_array[list[index]] != NS (this)) {
- STACK_WIND (frame,
- unify_link_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->link,
- &local->loc1,
- &local->loc2);
- break;
- }
- if (need_break)
- break;
- }
-
- return 0;
-}
-
-/**
- * unify_link -
- */
-int32_t
-unify_link (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
-{
- unify_local_t *local = NULL;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (oldloc);
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (newloc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
-
- loc_copy (&local->loc1, oldloc);
- loc_copy (&local->loc2, newloc);
-
- inode_ctx_get (oldloc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
-
- STACK_WIND (frame,
- unify_ns_link_cbk,
- NS(this),
- NS(this)->fops->link,
- oldloc,
- newloc);
-
- return 0;
-}
-
-
-/**
- * unify_checksum_cbk -
- */
-int32_t
-unify_checksum_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- uint8_t *fchecksum,
- uint8_t *dchecksum)
-{
- STACK_UNWIND (frame, op_ret, op_errno, fchecksum, dchecksum);
-
- return 0;
-}
-
-/**
- * unify_checksum -
- */
-int32_t
-unify_checksum (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flag)
-{
- STACK_WIND (frame,
- unify_checksum_cbk,
- NS(this),
- NS(this)->fops->checksum,
- loc,
- flag);
-
- return 0;
-}
-
-
-/**
- * unify_finodelk_cbk -
- */
-int
-unify_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-/**
- * unify_finodelk
- */
-int
-unify_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int cmd, struct flock *flock)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_finodelk_cbk,
- child, child->fops->finodelk,
- volume, fd, cmd, flock);
-
- return 0;
-}
-
-
-
-/**
- * unify_fentrylk_cbk -
- */
-int
-unify_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-/**
- * unify_fentrylk
- */
-int
-unify_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
-
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_fentrylk_cbk,
- child, child->fops->fentrylk,
- volume, fd, basename, cmd, type);
-
- return 0;
-}
-
-
-
-/**
- * unify_fxattrop_cbk -
- */
-int
-unify_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
-{
- STACK_UNWIND (frame, op_ret, op_errno, xattr);
- return 0;
-}
-
-/**
- * unify_fxattrop
- */
-int
-unify_fxattrop (call_frame_t *frame, xlator_t *this,
- fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_fxattrop_cbk,
- child, child->fops->fxattrop,
- fd, optype, xattr);
-
- return 0;
-}
-
-
-/**
- * unify_inodelk_cbk -
- */
-int
-unify_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-
-/**
- * unify_inodelk
- */
-int
-unify_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int cmd, struct flock *flock)
-{
- xlator_t *child = NULL;
-
- child = unify_loc_subvol (loc, this);
-
- STACK_WIND (frame, unify_inodelk_cbk,
- child, child->fops->inodelk,
- volume, loc, cmd, flock);
-
- return 0;
-}
-
-
-
-/**
- * unify_entrylk_cbk -
- */
-int
-unify_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-/**
- * unify_entrylk
- */
-int
-unify_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
-
-{
- xlator_t *child = NULL;
-
- child = unify_loc_subvol (loc, this);
-
- STACK_WIND (frame, unify_entrylk_cbk,
- child, child->fops->entrylk,
- volume, loc, basename, cmd, type);
-
- return 0;
-}
-
-
-
-/**
- * unify_xattrop_cbk -
- */
-int
-unify_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
-{
- STACK_UNWIND (frame, op_ret, op_errno, xattr);
- return 0;
-}
-
-/**
- * unify_xattrop
- */
-int
-unify_xattrop (call_frame_t *frame, xlator_t *this,
- loc_t *loc, gf_xattrop_flags_t optype, dict_t *xattr)
-{
- xlator_t *child = NULL;
-
- child = unify_loc_subvol (loc, this);
-
- STACK_WIND (frame, unify_xattrop_cbk,
- child, child->fops->xattrop,
- loc, optype, xattr);
-
- return 0;
-}
-
-int
-unify_forget (xlator_t *this,
- inode_t *inode)
-{
- int16_t *list = NULL;
- uint64_t tmp_list = 0;
-
- if (inode->st_mode && (!S_ISDIR(inode->st_mode))) {
- inode_ctx_get (inode, this, &tmp_list);
- if (tmp_list) {
- list = (int16_t *)(long)tmp_list;
- FREE (list);
- }
- }
-
- return 0;
-}
-
-/**
- * notify
- */
-int32_t
-notify (xlator_t *this,
- int32_t event,
- void *data,
- ...)
-{
- unify_private_t *priv = this->private;
- struct sched_ops *sched = NULL;
-
- if (!priv) {
- return 0;
- }
-
- sched = priv->sched_ops;
- if (!sched) {
- gf_log (this->name, GF_LOG_CRITICAL, "No scheduler :O");
- raise (SIGTERM);
- return 0;
- }
- if (priv->namespace == data) {
- if (event == GF_EVENT_CHILD_UP) {
- sched->notify (this, event, data);
- }
- return 0;
- }
-
- switch (event)
- {
- case GF_EVENT_CHILD_UP:
- {
- /* Call scheduler's update () to enable it for scheduling */
- sched->notify (this, event, data);
-
- LOCK (&priv->lock);
- {
- /* Increment the inode's generation, which is
- used for self_heal */
- ++priv->inode_generation;
- ++priv->num_child_up;
- }
- UNLOCK (&priv->lock);
-
- if (!priv->is_up) {
- default_notify (this, event, data);
- priv->is_up = 1;
- }
- }
- break;
- case GF_EVENT_CHILD_DOWN:
- {
- /* Call scheduler's update () to disable the child node
- * for scheduling
- */
- sched->notify (this, event, data);
- LOCK (&priv->lock);
- {
- --priv->num_child_up;
- }
- UNLOCK (&priv->lock);
-
- if (priv->num_child_up == 0) {
- /* Send CHILD_DOWN to upper layer */
- default_notify (this, event, data);
- priv->is_up = 0;
- }
- }
- break;
-
- default:
- {
- default_notify (this, event, data);
- }
- break;
- }
-
- return 0;
-}
-
-/**
- * init - This function is called first in the xlator, while initializing.
- * All the config file options are checked and appropriate flags are set.
- *
- * @this -
- */
-int32_t
-init (xlator_t *this)
-{
- int32_t ret = 0;
- int32_t count = 0;
- data_t *scheduler = NULL;
- data_t *data = NULL;
- xlator_t *ns_xl = NULL;
- xlator_list_t *trav = NULL;
- xlator_list_t *xlparent = NULL;
- xlator_list_t *parent = NULL;
- unify_private_t *_private = NULL;
-
- /* Check for number of child nodes, if there is no child nodes, exit */
- if (!this->children) {
- gf_log (this->name, GF_LOG_ERROR,
- "No child nodes specified. check \"subvolumes \" "
- "option in volfile");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
-
- /* Check for 'scheduler' in volume */
- scheduler = dict_get (this->options, "scheduler");
- if (!scheduler) {
- gf_log (this->name, GF_LOG_ERROR,
- "\"option scheduler <x>\" is missing in volfile");
- return -1;
- }
-
- /* Setting "option namespace <node>" */
- data = dict_get (this->options, "namespace");
- if(!data) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "namespace option not specified, Exiting");
- return -1;
- }
- /* Search namespace in the child node, if found, exit */
- trav = this->children;
- while (trav) {
- if (strcmp (trav->xlator->name, data->data) == 0)
- break;
- trav = trav->next;
- }
- if (trav) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "namespace node used as a subvolume, Exiting");
- return -1;
- }
-
- /* Search for the namespace node, if found, continue */
- ns_xl = this->next;
- while (ns_xl) {
- if (strcmp (ns_xl->name, data->data) == 0)
- break;
- ns_xl = ns_xl->next;
- }
- if (!ns_xl) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "namespace node not found in volfile, Exiting");
- return -1;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "namespace node specified as %s", data->data);
-
- _private = CALLOC (1, sizeof (*_private));
- ERR_ABORT (_private);
- _private->sched_ops = get_scheduler (this, scheduler->data);
- if (!_private->sched_ops) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "Error while loading scheduler. Exiting");
- FREE (_private);
- return -1;
- }
-
- if (ns_xl->parents) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "Namespace node should not be a child of any other node. Exiting");
- FREE (_private);
- return -1;
- }
-
- _private->namespace = ns_xl;
-
- /* update _private structure */
- {
- count = 0;
- trav = this->children;
- /* Get the number of child count */
- while (trav) {
- count++;
- trav = trav->next;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Child node count is %d", count);
-
- _private->child_count = count;
- if (count == 1) {
- /* TODO: Should I error out here? */
- gf_log (this->name, GF_LOG_CRITICAL,
- "WARNING: You have defined only one "
- "\"subvolumes\" for unify volume. It may not "
- "be the desired config, review your volume "
- "volfile. If this is how you are testing it,"
- " you may hit some performance penalty");
- }
-
- _private->xl_array = CALLOC (1,
- sizeof (xlator_t) * (count + 1));
- ERR_ABORT (_private->xl_array);
-
- count = 0;
- trav = this->children;
- while (trav) {
- _private->xl_array[count++] = trav->xlator;
- trav = trav->next;
- }
- _private->xl_array[count] = _private->namespace;
-
- /* self-heal part, start with generation '1' */
- _private->inode_generation = 1;
- /* Because, Foreground part is tested well */
- _private->self_heal = ZR_UNIFY_FG_SELF_HEAL;
- data = dict_get (this->options, "self-heal");
- if (data) {
- if (strcasecmp (data->data, "off") == 0)
- _private->self_heal = ZR_UNIFY_SELF_HEAL_OFF;
-
- if (strcasecmp (data->data, "foreground") == 0)
- _private->self_heal = ZR_UNIFY_FG_SELF_HEAL;
-
- if (strcasecmp (data->data, "background") == 0)
- _private->self_heal = ZR_UNIFY_BG_SELF_HEAL;
- }
-
- /* optimist - ask bulde for more about it */
- data = dict_get (this->options, "optimist");
- if (data) {
- if (gf_string2boolean (data->data,
- &_private->optimist) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "optimist excepts only boolean "
- "options");
- }
- }
-
- LOCK_INIT (&_private->lock);
- }
-
- /* Now that everything is fine. */
- this->private = (void *)_private;
- {
- /* Initialize scheduler, if everything else is successful */
- ret = _private->sched_ops->init (this);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "Initializing scheduler failed, Exiting");
- FREE (_private);
- return -1;
- }
-
- ret = 0;
-
- /* This section is required because some fops may look
- * for 'xl->parent' variable
- */
- xlparent = CALLOC (1, sizeof (*xlparent));
- xlparent->xlator = this;
- if (!ns_xl->parents) {
- ns_xl->parents = xlparent;
- } else {
- parent = ns_xl->parents;
- while (parent->next)
- parent = parent->next;
- parent->next = xlparent;
- }
- /* Initialize the namespace volume */
- if (!ns_xl->ready) {
- ret = xlator_tree_init (ns_xl);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "initializing namespace node failed, "
- "Exiting");
- FREE (_private);
- return -1;
- }
- }
- }
-
- /* Tell namespace node that init is done */
- xlator_notify (ns_xl, GF_EVENT_PARENT_UP, this);
-
- return 0;
-}
-
-/**
- * fini - Free all the allocated memory
- */
-void
-fini (xlator_t *this)
-{
- unify_private_t *priv = this->private;
- priv->sched_ops->fini (this);
- this->private = NULL;
- LOCK_DESTROY (&priv->lock);
- FREE (priv->xl_array);
- FREE (priv);
- return;
-}
-
-
-struct xlator_fops fops = {
- .stat = unify_stat,
- .chmod = unify_chmod,
- .readlink = unify_readlink,
- .mknod = unify_mknod,
- .mkdir = unify_mkdir,
- .unlink = unify_unlink,
- .rmdir = unify_rmdir,
- .symlink = unify_symlink,
- .rename = unify_rename,
- .link = unify_link,
- .chown = unify_chown,
- .truncate = unify_truncate,
- .create = unify_create,
- .open = unify_open,
- .readv = unify_readv,
- .writev = unify_writev,
- .statfs = unify_statfs,
- .flush = unify_flush,
- .fsync = unify_fsync,
- .setxattr = unify_setxattr,
- .getxattr = unify_getxattr,
- .removexattr = unify_removexattr,
- .opendir = unify_opendir,
- .readdir = unify_readdir,
- .fsyncdir = unify_fsyncdir,
- .access = unify_access,
- .ftruncate = unify_ftruncate,
- .fstat = unify_fstat,
- .lk = unify_lk,
- .fchown = unify_fchown,
- .fchmod = unify_fchmod,
- .utimens = unify_utimens,
- .lookup = unify_lookup,
- .getdents = unify_getdents,
- .checksum = unify_checksum,
- .inodelk = unify_inodelk,
- .finodelk = unify_finodelk,
- .entrylk = unify_entrylk,
- .fentrylk = unify_fentrylk,
- .xattrop = unify_xattrop,
- .fxattrop = unify_fxattrop
-};
-
-struct xlator_mops mops = {
-};
-
-struct xlator_cbks cbks = {
- .forget = unify_forget,
-};
-
-struct volume_options options[] = {
- { .key = { "namespace" },
- .type = GF_OPTION_TYPE_XLATOR
- },
- { .key = { "scheduler" },
- .value = { "alu", "rr", "random", "nufa", "switch" },
- .type = GF_OPTION_TYPE_STR
- },
- { .key = {"self-heal"},
- .value = { "foreground", "background", "off" },
- .type = GF_OPTION_TYPE_STR
- },
- /* TODO: remove it some time later */
- { .key = {"optimist"},
- .type = GF_OPTION_TYPE_BOOL
- },
-
- { .key = {NULL} },
-};
diff --git a/xlators/cluster/unify/src/unify.h b/xlators/cluster/unify/src/unify.h
deleted file mode 100644
index 9841e5c7a..000000000
--- a/xlators/cluster/unify/src/unify.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#ifndef _UNIFY_H
-#define _UNIFY_H
-
-#include "scheduler.h"
-#include "list.h"
-
-#define MAX_DIR_ENTRY_STRING (32 * 1024)
-
-#define ZR_UNIFY_SELF_HEAL_OFF 0
-#define ZR_UNIFY_FG_SELF_HEAL 1
-#define ZR_UNIFY_BG_SELF_HEAL 2
-
-/* Sometimes one should use completely random numbers.. its good :p */
-#define UNIFY_SELF_HEAL_GETDENTS_COUNT 512
-
-#define NS(xl) (((unify_private_t *)xl->private)->namespace)
-
-/* This is used to allocate memory for local structure */
-#define INIT_LOCAL(fr, loc) \
-do { \
- loc = CALLOC (1, sizeof (unify_local_t)); \
- ERR_ABORT (loc); \
- if (!loc) { \
- STACK_UNWIND (fr, -1, ENOMEM); \
- return 0; \
- } \
- fr->local = loc; \
- loc->op_ret = -1; \
- loc->op_errno = ENOENT; \
-} while (0)
-
-
-
-struct unify_private {
- /* Update this structure depending on requirement */
- void *scheduler; /* THIS SHOULD BE THE FIRST VARIABLE,
- if xlator is using scheduler */
- struct sched_ops *sched_ops; /* Scheduler options */
- xlator_t *namespace; /* ptr to namespace xlator */
- xlator_t **xl_array;
- gf_boolean_t optimist;
- int16_t child_count;
- int16_t num_child_up;
- uint8_t self_heal;
- uint8_t is_up;
- uint64_t inode_generation;
- gf_lock_t lock;
-};
-typedef struct unify_private unify_private_t;
-
-struct unify_self_heal_struct {
- uint8_t dir_checksum[NAME_MAX];
- uint8_t ns_dir_checksum[NAME_MAX];
- uint8_t file_checksum[NAME_MAX];
- uint8_t ns_file_checksum[NAME_MAX];
- off_t *offset_list;
- int *count_list;
- dir_entry_t **entry_list;
-};
-
-
-struct _unify_local_t {
- int32_t call_count;
- int32_t op_ret;
- int32_t op_errno;
- mode_t mode;
- off_t offset;
- dev_t dev;
- uid_t uid;
- gid_t gid;
- int32_t flags;
- int32_t entry_count;
- int32_t count; // dir_entry_t count;
- fd_t *fd;
- struct stat stbuf;
- struct statvfs statvfs_buf;
- struct timespec tv[2];
- char *name;
- int32_t revalidate;
-
- ino_t st_ino;
- nlink_t st_nlink;
-
- dict_t *dict;
-
- int16_t *list;
- int16_t *new_list; /* Used only in case of rename */
- int16_t index;
-
- int32_t failed;
- int32_t return_eio; /* Used in case of different st-mode
- present for a given path */
-
- uint64_t inode_generation; /* used to store the per directory
- * inode_generation. Got from inode's ctx
- * of directory inodes
- */
-
- struct unify_self_heal_struct *sh_struct;
- loc_t loc1, loc2;
-};
-typedef struct _unify_local_t unify_local_t;
-
-int32_t zr_unify_self_heal (call_frame_t *frame,
- xlator_t *this,
- unify_local_t *local);
-
-#endif /* _UNIFY_H */
diff --git a/xlators/debug/error-gen/src/Makefile.am b/xlators/debug/error-gen/src/Makefile.am
index 1bd7f332c..5075c59a8 100644
--- a/xlators/debug/error-gen/src/Makefile.am
+++ b/xlators/debug/error-gen/src/Makefile.am
@@ -2,13 +2,16 @@
xlator_LTLIBRARIES = error-gen.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/debug
-error_gen_la_LDFLAGS = -module -avoidversion
+error_gen_la_LDFLAGS = -module -avoid-version
error_gen_la_SOURCES = error-gen.c
error_gen_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+noinst_HEADERS = error-gen.h error-gen-mem-types.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/debug/error-gen/src/error-gen-mem-types.h b/xlators/debug/error-gen/src/error-gen-mem-types.h
new file mode 100644
index 000000000..f02280535
--- /dev/null
+++ b/xlators/debug/error-gen/src/error-gen-mem-types.h
@@ -0,0 +1,20 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __ERROR_GEN_MEM_TYPES_H__
+#define __ERROR_GEN_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_error_gen_mem_types_ {
+ gf_error_gen_mt_eg_t = gf_common_mt_end + 1,
+ gf_error_gen_mt_end
+};
+#endif
diff --git a/xlators/debug/error-gen/src/error-gen.c b/xlators/debug/error-gen/src/error-gen.c
index a14d7beda..ec0874b35 100644
--- a/xlators/debug/error-gen/src/error-gen.c
+++ b/xlators/debug/error-gen/src/error-gen.c
@@ -1,1733 +1,2163 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
#endif
#include "xlator.h"
+#include "error-gen.h"
+#include "statedump.h"
+
+sys_error_t error_no_list[] = {
+ [GF_FOP_LOOKUP] = { .error_no_count = 4,
+ .error_no = {ENOENT,ENOTDIR,
+ ENAMETOOLONG,EAGAIN}},
+ [GF_FOP_STAT] = { .error_no_count = 7,
+ .error_no = {EACCES,EBADF,EFAULT,
+ ENAMETOOLONG,ENOENT,
+ ENOMEM,ENOTDIR}},
+ [GF_FOP_READLINK] = { .error_no_count = 8,
+ .error_no = {EACCES,EFAULT,EINVAL,EIO,
+ ENAMETOOLONG,ENOENT,ENOMEM,
+ ENOTDIR}},
+ [GF_FOP_MKNOD] = { .error_no_count = 11,
+ .error_no = {EACCES,EEXIST,EFAULT,
+ EINVAL,ENAMETOOLONG,
+ ENOENT,ENOMEM,ENOSPC,
+ ENOTDIR,EPERM,EROFS}},
+ [GF_FOP_MKDIR] = { .error_no_count = 10,
+ .error_no = {EACCES,EEXIST,EFAULT,
+ ENAMETOOLONG,ENOENT,
+ ENOMEM,ENOSPC,ENOTDIR,
+ EPERM,EROFS}},
+ [GF_FOP_UNLINK] = { .error_no_count = 10,
+ .error_no = {EACCES,EBUSY,EFAULT,EIO,
+ EISDIR,ENAMETOOLONG,
+ ENOENT,ENOMEM,ENOTDIR,
+ EPERM,EROFS}},
+ [GF_FOP_RMDIR] = { .error_no_count = 8,
+ .error_no = {EACCES,EBUSY,EFAULT,
+ ENOMEM,ENOTDIR,ENOTEMPTY,
+ EPERM,EROFS}},
+ [GF_FOP_SYMLINK] = { .error_no_count = 11,
+ .error_no = {EACCES,EEXIST,EFAULT,EIO,
+ ENAMETOOLONG,ENOENT,ENOMEM,
+ ENOSPC,ENOTDIR,EPERM,
+ EROFS}},
+ [GF_FOP_RENAME] = { .error_no_count = 13,
+ .error_no = {EACCES,EBUSY,EFAULT,
+ EINVAL,EISDIR,EMLINK,
+ ENAMETOOLONG,ENOENT,ENOMEM,
+ ENOSPC,ENOTDIR,EEXIST,
+ EXDEV}},
+ [GF_FOP_LINK] = { .error_no_count = 13,
+ .error_no = {EACCES,EFAULT,EEXIST,EIO,
+ EMLINK,ENAMETOOLONG,
+ ENOENT,ENOMEM,ENOSPC,
+ ENOTDIR,EPERM,EROFS,
+ EXDEV}},
+ [GF_FOP_TRUNCATE] = { .error_no_count = 10,
+ .error_no = {EACCES,EFAULT,EFBIG,
+ EINTR,EINVAL,EIO,EISDIR,
+ ENAMETOOLONG,ENOENT,
+ EISDIR}},
+ [GF_FOP_CREATE] = {.error_no_count = 10,
+ .error_no = {EACCES,EEXIST,EFAULT,
+ EISDIR,EMFILE,ENAMETOOLONG,
+ ENFILE,ENODEV,ENOENT,
+ ENODEV}},
+ [GF_FOP_OPEN] = { .error_no_count = 10,
+ .error_no = {EACCES,EEXIST,EFAULT,
+ EISDIR,EMFILE,
+ ENAMETOOLONG,ENFILE,
+ ENODEV,ENOENT,ENOMEM}},
+ [GF_FOP_READ] = { .error_no_count = 5,
+ .error_no = {EINVAL,EBADF,EFAULT,EISDIR,
+ ENAMETOOLONG}},
+ [GF_FOP_WRITE] = { .error_no_count = 7,
+ .error_no = {EINVAL,EBADF,EFAULT,EISDIR,
+ ENAMETOOLONG,ENOSPC,
+ GF_ERROR_SHORT_WRITE}},
+ [GF_FOP_STATFS] = {.error_no_count = 10,
+ .error_no = {EACCES,EBADF,EFAULT,EINTR,
+ EIO,ENAMETOOLONG,ENOENT,
+ ENOMEM,ENOSYS,ENOTDIR}},
+ [GF_FOP_FLUSH] = { .error_no_count = 5,
+ .error_no = {EACCES,EFAULT,
+ ENAMETOOLONG,ENOSYS,
+ ENOENT}},
+ [GF_FOP_FSYNC] = { .error_no_count = 4,
+ .error_no = {EBADF,EIO,EROFS,EINVAL}},
+ [GF_FOP_SETXATTR] = { .error_no_count = 4,
+ .error_no = {EACCES,EBADF,EINTR,
+ ENAMETOOLONG}},
+ [GF_FOP_GETXATTR] = { .error_no_count = 4,
+ .error_no = {EACCES,EBADF,ENAMETOOLONG,
+ EINTR}},
+ [GF_FOP_REMOVEXATTR] = { .error_no_count = 4,
+ .error_no = {EACCES,EBADF,ENAMETOOLONG,
+ EINTR}},
+ [GF_FOP_FSETXATTR] = { .error_no_count = 4,
+ .error_no = {EACCES,EBADF,EINTR,
+ ENAMETOOLONG}},
+ [GF_FOP_FGETXATTR] = { .error_no_count = 4,
+ .error_no = {EACCES,EBADF,ENAMETOOLONG,
+ EINTR}},
+ [GF_FOP_FREMOVEXATTR] = { .error_no_count = 4,
+ .error_no = {EACCES,EBADF,ENAMETOOLONG,
+ EINTR}},
+ [GF_FOP_OPENDIR] = { .error_no_count = 8,
+ .error_no = {EACCES,EEXIST,EFAULT,
+ EISDIR,EMFILE,
+ ENAMETOOLONG,ENFILE,
+ ENODEV}},
+ [GF_FOP_READDIR] = { .error_no_count = 5,
+ .error_no = {EINVAL,EACCES,EBADF,
+ EMFILE,ENOENT}},
+ [GF_FOP_READDIRP] = { .error_no_count = 5,
+ .error_no = {EINVAL,EACCES,EBADF,
+ EMFILE,ENOENT}},
+ [GF_FOP_FSYNCDIR] = { .error_no_count = 4,
+ .error_no = {EBADF,EIO,EROFS,EINVAL}},
+ [GF_FOP_ACCESS] = { .error_no_count = 8,
+ .error_no = {EACCES,ENAMETOOLONG,
+ ENOENT,ENOTDIR,EROFS,
+ EFAULT,EINVAL,EIO}},
+ [GF_FOP_FTRUNCATE] = { .error_no_count = 9,
+ .error_no = {EACCES,EFAULT,EFBIG,
+ EINTR,EINVAL,EIO,EISDIR,
+ ENAMETOOLONG,ENOENT}},
+ [GF_FOP_FSTAT] = { .error_no_count = 7,
+ .error_no = {EACCES,EBADF,EFAULT,
+ ENAMETOOLONG,ENOENT,
+ ENOMEM,ENOTDIR}},
+ [GF_FOP_LK] = { .error_no_count = 4,
+ .error_no = {EACCES,EFAULT,ENOENT,
+ EINTR}},
+ [GF_FOP_XATTROP] = { .error_no_count = 5,
+ .error_no = {EACCES,EFAULT,
+ ENAMETOOLONG,ENOSYS,
+ ENOENT}},
+ [GF_FOP_FXATTROP] = { .error_no_count = 4,
+ .error_no = {EBADF,EIO,EROFS,EINVAL}},
+ [GF_FOP_INODELK] = { .error_no_count = 4,
+ .error_no = {EACCES,EBADF,EINTR,
+ ENAMETOOLONG}},
+ [GF_FOP_FINODELK] = { .error_no_count = 4,
+ .error_no = {EACCES,EBADF,EINTR,
+ ENAMETOOLONG}},
+ [GF_FOP_ENTRYLK] = { .error_no_count = 4,
+ .error_no = {EACCES,EBADF,
+ ENAMETOOLONG,EINTR}},
+ [GF_FOP_FENTRYLK] = { .error_no_count = 10,
+ .error_no = {EACCES,EEXIST,EFAULT,
+ EISDIR,EMFILE,
+ ENAMETOOLONG,ENFILE,
+ ENODEV,ENOENT,ENOMEM}},
+ [GF_FOP_SETATTR] = {.error_no_count = 11,
+ .error_no = {EACCES,EFAULT,EIO,
+ ENAMETOOLONG,ENOENT,
+ ENOMEM,ENOTDIR,EPERM,
+ EROFS,EBADF,EIO}},
+ [GF_FOP_FSETATTR] = { .error_no_count = 11,
+ .error_no = {EACCES,EFAULT,EIO,
+ ENAMETOOLONG,ENOENT,
+ ENOMEM,ENOTDIR,EPERM,
+ EROFS,EBADF,EIO}},
+ [GF_FOP_GETSPEC] = { .error_no_count = 4,
+ .error_no = {EACCES,EBADF,ENAMETOOLONG,
+ EINTR}}
+};
-typedef struct {
- int op_count;
-} eg_t;
-
-int error_gen (xlator_t *this)
-{
- eg_t *egp = NULL;
- int count = 0;
- egp = this->private;
- count = ++egp->op_count;
- if((count % 10) == 0) {
- count = count / 10;
- if ((count % 2) == 0)
- return ENOTCONN;
- else
- return EIO;
- }
- return 0;
+int
+generate_rand_no (int op_no)
+{
+ int rand_no = 0;
+
+ if (op_no < GF_FOP_MAXVALUE)
+ rand_no = rand () % error_no_list[op_no].error_no_count;
+ return rand_no;
}
-static int32_t
-error_gen_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf,
- dict_t *dict)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- inode,
- buf,
- dict);
- return 0;
+int
+conv_errno_to_int (char **error_no)
+{
+ if (!strcmp ((*error_no), "ENOENT"))
+ return ENOENT;
+ else if (!strcmp ((*error_no), "ENOTDIR"))
+ return ENOTDIR;
+ else if (!strcmp ((*error_no), "ENAMETOOLONG"))
+ return ENAMETOOLONG;
+ else if (!strcmp ((*error_no), "EACCES"))
+ return EACCES;
+ else if (!strcmp ((*error_no), "EBADF"))
+ return EBADF;
+ else if (!strcmp ((*error_no), "EFAULT"))
+ return EFAULT;
+ else if (!strcmp ((*error_no), "ENOMEM"))
+ return ENOMEM;
+ else if (!strcmp ((*error_no), "EINVAL"))
+ return EINVAL;
+ else if (!strcmp ((*error_no), "EIO"))
+ return EIO;
+ else if (!strcmp ((*error_no), "EEXIST"))
+ return EEXIST;
+ else if (!strcmp ((*error_no), "ENOSPC"))
+ return ENOSPC;
+ else if (!strcmp ((*error_no), "EPERM"))
+ return EPERM;
+ else if (!strcmp ((*error_no), "EROFS"))
+ return EROFS;
+ else if (!strcmp ((*error_no), "EBUSY"))
+ return EBUSY;
+ else if (!strcmp ((*error_no), "EISDIR"))
+ return EISDIR;
+ else if (!strcmp ((*error_no), "ENOTEMPTY"))
+ return ENOTEMPTY;
+ else if (!strcmp ((*error_no), "EMLINK"))
+ return EMLINK;
+ else if (!strcmp ((*error_no), "ENODEV"))
+ return ENODEV;
+ else if (!strcmp ((*error_no), "EXDEV"))
+ return EXDEV;
+ else if (!strcmp ((*error_no), "EMFILE"))
+ return EMFILE;
+ else if (!strcmp ((*error_no), "ENFILE"))
+ return ENFILE;
+ else if (!strcmp ((*error_no), "ENOSYS"))
+ return ENOSYS;
+ else if (!strcmp ((*error_no), "EINTR"))
+ return EINTR;
+ else if (!strcmp ((*error_no), "EFBIG"))
+ return EFBIG;
+ else if (!strcmp((*error_no), "GF_ERROR_SHORT_WRITE"))
+ return GF_ERROR_SHORT_WRITE;
+ else
+ return EAGAIN;
}
-int32_t
-error_gen_lookup (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xattr_req)
-{
- int op_errno = 0;
- op_errno = error_gen(this);
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
- }
- STACK_WIND (frame,
- error_gen_lookup_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup,
- loc,
- xattr_req);
- return 0;
+int
+get_fop_int (char **op_no_str)
+{
+ if (!strcmp ((*op_no_str), "lookup"))
+ return GF_FOP_LOOKUP;
+ else if (!strcmp ((*op_no_str), "stat"))
+ return GF_FOP_STAT;
+ else if (!strcmp ((*op_no_str), "readlink"))
+ return GF_FOP_READLINK;
+ else if (!strcmp ((*op_no_str), "mknod"))
+ return GF_FOP_MKNOD;
+ else if (!strcmp ((*op_no_str), "mkdir"))
+ return GF_FOP_MKDIR;
+ else if (!strcmp ((*op_no_str), "unlink"))
+ return GF_FOP_UNLINK;
+ else if (!strcmp ((*op_no_str), "rmdir"))
+ return GF_FOP_RMDIR;
+ else if (!strcmp ((*op_no_str), "symlink"))
+ return GF_FOP_SYMLINK;
+ else if (!strcmp ((*op_no_str), "rename"))
+ return GF_FOP_RENAME;
+ else if (!strcmp ((*op_no_str), "link"))
+ return GF_FOP_LINK;
+ else if (!strcmp ((*op_no_str), "truncate"))
+ return GF_FOP_TRUNCATE;
+ else if (!strcmp ((*op_no_str), "create"))
+ return GF_FOP_CREATE;
+ else if (!strcmp ((*op_no_str), "open"))
+ return GF_FOP_OPEN;
+ else if (!strcmp ((*op_no_str), "readv"))
+ return GF_FOP_READ;
+ else if (!strcmp ((*op_no_str), "writev"))
+ return GF_FOP_WRITE;
+ else if (!strcmp ((*op_no_str), "statfs"))
+ return GF_FOP_STATFS;
+ else if (!strcmp ((*op_no_str), "flush"))
+ return GF_FOP_FLUSH;
+ else if (!strcmp ((*op_no_str), "fsync"))
+ return GF_FOP_FSYNC;
+ else if (!strcmp ((*op_no_str), "setxattr"))
+ return GF_FOP_SETXATTR;
+ else if (!strcmp ((*op_no_str), "getxattr"))
+ return GF_FOP_GETXATTR;
+ else if (!strcmp ((*op_no_str), "removexattr"))
+ return GF_FOP_REMOVEXATTR;
+ else if (!strcmp ((*op_no_str), "fsetxattr"))
+ return GF_FOP_FSETXATTR;
+ else if (!strcmp ((*op_no_str), "fgetxattr"))
+ return GF_FOP_FGETXATTR;
+ else if (!strcmp ((*op_no_str), "fremovexattr"))
+ return GF_FOP_FREMOVEXATTR;
+ else if (!strcmp ((*op_no_str), "opendir"))
+ return GF_FOP_OPENDIR;
+ else if (!strcmp ((*op_no_str), "readdir"))
+ return GF_FOP_READDIR;
+ else if (!strcmp ((*op_no_str), "readdirp"))
+ return GF_FOP_READDIRP;
+ else if (!strcmp ((*op_no_str), "fsyncdir"))
+ return GF_FOP_FSYNCDIR;
+ else if (!strcmp ((*op_no_str), "access"))
+ return GF_FOP_ACCESS;
+ else if (!strcmp ((*op_no_str), "ftruncate"))
+ return GF_FOP_FTRUNCATE;
+ else if (!strcmp ((*op_no_str), "fstat"))
+ return GF_FOP_FSTAT;
+ else if (!strcmp ((*op_no_str), "lk"))
+ return GF_FOP_LK;
+ else if (!strcmp ((*op_no_str), "xattrop"))
+ return GF_FOP_XATTROP;
+ else if (!strcmp ((*op_no_str), "fxattrop"))
+ return GF_FOP_FXATTROP;
+ else if (!strcmp ((*op_no_str), "inodelk"))
+ return GF_FOP_INODELK;
+ else if (!strcmp ((*op_no_str), "finodelk"))
+ return GF_FOP_FINODELK;
+ else if (!strcmp ((*op_no_str), "etrylk"))
+ return GF_FOP_ENTRYLK;
+ else if (!strcmp ((*op_no_str), "fentrylk"))
+ return GF_FOP_FENTRYLK;
+ else if (!strcmp ((*op_no_str), "setattr"))
+ return GF_FOP_SETATTR;
+ else if (!strcmp ((*op_no_str), "fsetattr"))
+ return GF_FOP_FSETATTR;
+ else if (!strcmp ((*op_no_str), "getspec"))
+ return GF_FOP_GETSPEC;
+ else
+ return -1;
}
+int
+error_gen (xlator_t *this, int op_no)
+{
+ eg_t *egp = NULL;
+ int count = 0;
+ int failure_iter_no = GF_FAILURE_DEFAULT;
+ char *error_no = NULL;
+ int rand_no = 0;
+ int ret = 0;
+
+ egp = this->private;
+
+ LOCK (&egp->lock);
+ {
+ count = ++egp->op_count;
+ failure_iter_no = egp->failure_iter_no;
+ error_no = egp->error_no;
+ }
+ UNLOCK (&egp->lock);
+
+ if((count % failure_iter_no) == 0) {
+ LOCK (&egp->lock);
+ {
+ egp->op_count = 0;
+ }
+ UNLOCK (&egp->lock);
+
+ if (error_no)
+ ret = conv_errno_to_int (&error_no);
+ else {
+
+ rand_no = generate_rand_no (op_no);
+ if (op_no >= GF_FOP_MAXVALUE)
+ op_no = 0;
+ if (rand_no >= error_no_list[op_no].error_no_count)
+ rand_no = 0;
+ ret = error_no_list[op_no].error_no[rand_no];
+ }
+ if (egp->random_failure == _gf_true)
+ egp->failure_iter_no = 3 + (rand () % GF_UNIVERSAL_ANSWER);
+ }
+ return ret;
+}
-int32_t
-error_gen_forget (xlator_t *this,
- inode_t *inode)
+
+int
+error_gen_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
{
- return 0;
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode,
+ buf, xdata, postparent);
+ return 0;
}
-int32_t
-error_gen_stat_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
-}
-int32_t
-error_gen_stat (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
+int
+error_gen_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_LOOKUP];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_LOOKUP);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (lookup, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_stat_cbk,
+
+ STACK_WIND (frame, error_gen_lookup_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat,
- loc);
- return 0;
+ FIRST_CHILD(this)->fops->lookup,
+ loc, xdata);
+ return 0;
}
-int32_t
-error_gen_chmod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
+
+int
+error_gen_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
}
-int32_t
-error_gen_chmod (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode)
+int
+error_gen_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_STAT];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_STAT);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (stat, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_chmod_cbk,
+
+ STACK_WIND (frame, error_gen_stat_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->chmod,
- loc,
- mode);
- return 0;
+ FIRST_CHILD(this)->fops->stat,
+ loc, xdata);
+ return 0;
}
-int32_t
-error_gen_fchmod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
+int
+error_gen_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop, xdata);
+ return 0;
}
-int32_t
-error_gen_fchmod (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- mode_t mode)
+
+int
+error_gen_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
- }
- STACK_WIND (frame,
- error_gen_fchmod_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fchmod,
- fd,
- mode);
- return 0;
-}
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
-int32_t
-error_gen_chown_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
-}
+ egp = this->private;
+ enable = egp->enable[GF_FOP_SETATTR];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_SETATTR);
-int32_t
-error_gen_chown (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- uid_t uid,
- gid_t gid)
-{
- int op_errno = 0;
- op_errno = error_gen(this);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (setattr, frame, -1, op_errno, NULL, NULL, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_chown_cbk,
+
+ STACK_WIND (frame, error_gen_setattr_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->chown,
- loc,
- uid,
- gid);
- return 0;
+ FIRST_CHILD(this)->fops->setattr,
+ loc, stbuf, valid, xdata);
+ return 0;
}
-int32_t
-error_gen_fchown_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
-}
-int32_t
-error_gen_fchown (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- uid_t uid,
- gid_t gid)
-{
- int op_errno = 0;
- op_errno = error_gen(this);
+int
+error_gen_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FSETATTR];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_FSETATTR);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (fsetattr, frame, -1, op_errno, NULL, NULL, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_fchown_cbk,
+
+ STACK_WIND (frame, error_gen_setattr_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fchown,
- fd,
- uid,
- gid);
- return 0;
+ FIRST_CHILD(this)->fops->fsetattr,
+ fd, stbuf, valid, xdata);
+ return 0;
}
-int32_t
-error_gen_truncate_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
+
+int
+error_gen_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+ return 0;
}
-int32_t
-error_gen_truncate (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- off_t offset)
+
+int
+error_gen_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ off_t offset, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_TRUNCATE];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_TRUNCATE);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (truncate, frame, -1, op_errno,
+ NULL, NULL, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_truncate_cbk,
+
+ STACK_WIND (frame, error_gen_truncate_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->truncate,
- loc,
- offset);
- return 0;
+ loc, offset, xdata);
+ return 0;
}
-int32_t
-error_gen_ftruncate_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
+
+int
+error_gen_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+ return 0;
}
-int32_t
-error_gen_ftruncate (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- off_t offset)
+
+int
+error_gen_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp =NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FTRUNCATE];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_FTRUNCATE);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (ftruncate, frame, -1, op_errno,
+ NULL, NULL, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_ftruncate_cbk,
+
+ STACK_WIND (frame, error_gen_ftruncate_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->ftruncate,
- fd,
- offset);
- return 0;
-}
-
-int32_t
-error_gen_utimens_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
+ fd, offset, xdata);
+ return 0;
}
-int32_t
-error_gen_utimens (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- struct timespec tv[2])
+int
+error_gen_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
- }
- STACK_WIND (frame,
- error_gen_utimens_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->utimens,
- loc,
- tv);
- return 0;
+ STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, xdata);
+ return 0;
}
-int32_t
-error_gen_access_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
-}
-int32_t
-error_gen_access (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t mask)
+int
+error_gen_access (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t mask, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_ACCESS];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_ACCESS);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (access, frame, -1, op_errno, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_access_cbk,
+
+ STACK_WIND (frame, error_gen_access_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->access,
- loc,
- mask);
- return 0;
+ loc, mask, xdata);
+ return 0;
}
-int32_t
-error_gen_readlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- const char *path)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- path);
- return 0;
+int
+error_gen_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ const char *path, struct iatt *sbuf, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path, sbuf, xdata);
+ return 0;
}
-int32_t
-error_gen_readlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- size_t size)
+
+int
+error_gen_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ size_t size, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_READLINK];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_READLINK);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (readlink, frame, -1, op_errno, NULL, NULL, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_readlink_cbk,
+
+ STACK_WIND (frame, error_gen_readlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readlink,
- loc,
- size);
- return 0;
+ loc, size, xdata);
+ return 0;
}
-int32_t
-error_gen_mknod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- inode,
- buf);
- return 0;
+int
+error_gen_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno,
+ inode, buf,
+ preparent, postparent, xdata);
+ return 0;
}
-int32_t
-error_gen_mknod (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode,
- dev_t rdev)
-{
- int op_errno = 0;
- op_errno = error_gen(this);
+
+int
+error_gen_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_MKNOD];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_MKNOD);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
- return 0;
+ STACK_UNWIND_STRICT (mknod, frame, -1, op_errno, NULL, NULL,
+ NULL, NULL, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_mknod_cbk,
+
+ STACK_WIND (frame, error_gen_mknod_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mknod,
- loc, mode, rdev);
- return 0;
+ loc, mode, rdev, umask, xdata);
+ return 0;
}
-int32_t
-error_gen_mkdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- inode,
- buf);
- return 0;
+
+int
+error_gen_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno,
+ inode, buf,
+ preparent, postparent, xdata);
+ return 0;
}
-int32_t
-error_gen_mkdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode)
+int
+error_gen_mkdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_MKDIR];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_MKDIR);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
- return 0;
+ STACK_UNWIND_STRICT (mkdir, frame, -1, op_errno, NULL, NULL,
+ NULL, NULL, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_mkdir_cbk,
+
+ STACK_WIND (frame, error_gen_mkdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mkdir,
- loc, mode);
- return 0;
+ loc, mode, umask, xdata);
+ return 0;
}
-int32_t
-error_gen_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+
+int
+error_gen_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
+ return 0;
}
-int32_t
-error_gen_unlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
+
+int
+error_gen_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_UNLINK];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_UNLINK);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (unlink, frame, -1, op_errno, NULL, NULL,
+ xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_unlink_cbk,
+
+ STACK_WIND (frame, error_gen_unlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->unlink,
- loc);
- return 0;
+ loc, xflag, xdata);
+ return 0;
}
-int32_t
-error_gen_rmdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
+
+int
+error_gen_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
+ return 0;
}
-int32_t
-error_gen_rmdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
+
+int
+error_gen_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_RMDIR];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_RMDIR);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (rmdir, frame, -1, op_errno, NULL, NULL, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_rmdir_cbk,
+
+ STACK_WIND (frame, error_gen_rmdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rmdir,
- loc);
- return 0;
+ loc, flags, xdata);
+ return 0;
}
-int32_t
-error_gen_symlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
- return 0;
+int
+error_gen_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
}
-int32_t
-error_gen_symlink (call_frame_t *frame,
- xlator_t *this,
- const char *linkpath,
- loc_t *loc)
+
+int
+error_gen_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_SYMLINK];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_SYMLINK);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+ STACK_UNWIND_STRICT (symlink, frame, -1, op_errno, NULL, NULL,
+ NULL, NULL, NULL); /* pre & post parent attr */
return 0;
}
- STACK_WIND (frame,
- error_gen_symlink_cbk,
+
+ STACK_WIND (frame, error_gen_symlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->symlink,
- linkpath, loc);
- return 0;
+ linkpath, loc, umask, xdata);
+ return 0;
}
-int32_t
-error_gen_rename_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
+int
+error_gen_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf,
+ preoldparent, postoldparent,
+ prenewparent, postnewparent, xdata);
+ return 0;
}
-int32_t
-error_gen_rename (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
+
+int
+error_gen_rename (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_RENAME];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_RENAME);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
+ STACK_UNWIND_STRICT (rename, frame, -1, op_errno, NULL,
+ NULL, NULL, NULL, NULL, NULL);
return 0;
}
- STACK_WIND (frame,
- error_gen_rename_cbk,
+
+ STACK_WIND (frame, error_gen_rename_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rename,
- oldloc, newloc);
- return 0;
+ oldloc, newloc, xdata);
+ return 0;
}
-int32_t
-error_gen_link_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
- return 0;
+int
+error_gen_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
}
-int32_t
-error_gen_link (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
+
+int
+error_gen_link (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_LINK];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_LINK);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+ STACK_UNWIND_STRICT (link, frame, -1, op_errno, NULL, NULL,
+ NULL, NULL, NULL);
return 0;
}
- STACK_WIND (frame,
- error_gen_link_cbk,
+
+ STACK_WIND (frame, error_gen_link_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->link,
- oldloc, newloc);
- return 0;
+ oldloc, newloc, xdata);
+ return 0;
}
-int32_t
-error_gen_create_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct stat *buf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf);
- return 0;
+int
+error_gen_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ fd_t *fd, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
}
-int32_t
-error_gen_create (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags,
- mode_t mode, fd_t *fd)
-{
- int op_errno = 0;
- op_errno = error_gen(this);
+
+int
+error_gen_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *xdata)
+{
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_CREATE];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_CREATE);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL);
+ STACK_UNWIND_STRICT (create, frame, -1, op_errno, NULL, NULL,
+ NULL, NULL, NULL, NULL);
return 0;
}
STACK_WIND (frame, error_gen_create_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->create,
- loc, flags, mode, fd);
- return 0;
+ loc, flags, mode, umask, fd, xdata);
+ return 0;
}
-int32_t
-error_gen_open_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- fd);
- return 0;
+
+int
+error_gen_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
+ return 0;
}
-int32_t
-error_gen_open (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags, fd_t *fd)
+
+int
+error_gen_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_OPEN];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_OPEN);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (open, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_open_cbk,
+ STACK_WIND (frame, error_gen_open_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->open,
- loc, flags, fd);
- return 0;
+ loc, flags, fd, xdata);
+ return 0;
}
-int32_t
-error_gen_readv_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct stat *stbuf,
- struct iobref *iobref)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- vector,
- count,
- stbuf,
- iobref);
- return 0;
+
+int
+error_gen_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iovec *vector, int32_t count,
+ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno,
+ vector, count, stbuf, iobref, xdata);
+ return 0;
}
-int32_t
-error_gen_readv (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset)
-{
- int op_errno = 0;
- op_errno = error_gen(this);
+
+int
+error_gen_readv (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata)
+{
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_READ];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_READ);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL, 0, NULL);
- return 0;
+ STACK_UNWIND_STRICT (readv, frame, -1, op_errno, NULL, 0,
+ NULL, NULL, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_readv_cbk,
+ STACK_WIND (frame, error_gen_readv_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readv,
- fd,
- size,
- offset);
- return 0;
+ fd, size, offset, flags, xdata);
+ return 0;
}
-int32_t
-error_gen_writev_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *stbuf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- stbuf);
- return 0;
+int
+error_gen_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
}
-int32_t
-error_gen_writev (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t off,
- struct iobref *iobref)
-{
- int op_errno = 0;
- op_errno = error_gen(this);
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
+
+int
+error_gen_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count,
+ off_t off, uint32_t flags, struct iobref *iobref, dict_t *xdata)
+{
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_WRITE];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_WRITE);
+
+ if (op_errno == GF_ERROR_SHORT_WRITE) {
+ struct iovec *shortvec;
+
+ /*
+ * A short write error returns some value less than what was
+ * requested from a write. To simulate this, replace the vector
+ * with one half the size;
+ */
+ shortvec = iov_dup(vector, 1);
+ shortvec->iov_len /= 2;
+
+ STACK_WIND(frame, error_gen_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, shortvec, count,
+ off, flags, iobref, xdata);
+ GF_FREE(shortvec);
return 0;
+ } else if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL, xdata);
+ return 0;
}
-
- STACK_WIND (frame,
- error_gen_writev_cbk,
+ STACK_WIND (frame, error_gen_writev_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->writev,
- fd,
- vector,
- count,
- off,
- iobref);
- return 0;
+ fd, vector, count, off, flags, iobref, xdata);
+ return 0;
}
-int32_t
-error_gen_flush_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
+
+int
+error_gen_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata);
+ return 0;
}
-int32_t
-error_gen_flush (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
+
+int
+error_gen_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FLUSH];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_FLUSH);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (flush, frame, -1, op_errno, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_flush_cbk,
- FIRST_CHILD(this),
+ STACK_WIND (frame, error_gen_flush_cbk,
+ FIRST_CHILD(this),
FIRST_CHILD(this)->fops->flush,
- fd);
- return 0;
+ fd, xdata);
+ return 0;
}
-int32_t
-error_gen_fsync_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
+int
+error_gen_fsync_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
}
-int32_t
-error_gen_fsync (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags)
+
+int
+error_gen_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FSYNC];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_FSYNC);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (fsync, frame, -1, op_errno, NULL, NULL, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_fsync_cbk,
+ STACK_WIND (frame, error_gen_fsync_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsync,
- fd,
- flags);
- return 0;
+ fd, flags, xdata);
+ return 0;
}
-int32_t
-error_gen_fstat_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
+
+int
+error_gen_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
}
-int32_t
-error_gen_fstat (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
+
+int
+error_gen_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FSTAT];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_FSTAT);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (fstat, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_fstat_cbk,
+ STACK_WIND (frame, error_gen_fstat_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fstat,
- fd);
- return 0;
+ fd, xdata);
+ return 0;
}
-int32_t
-error_gen_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- fd);
- return 0;
+
+int
+error_gen_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, xdata);
+ return 0;
}
-int32_t
-error_gen_opendir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, fd_t *fd)
+
+int
+error_gen_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_OPENDIR];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_OPENDIR);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (opendir, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_opendir_cbk,
+ STACK_WIND (frame, error_gen_opendir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->opendir,
- loc, fd);
- return 0;
+ loc, fd, xdata);
+ return 0;
}
-
-int32_t
-error_gen_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entries,
- int32_t count)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- entries,
- count);
- return 0;
+int
+error_gen_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, xdata);
+ return 0;
}
-int32_t
-error_gen_getdents (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset,
- int32_t flag)
-{
- int op_errno = 0;
- op_errno = error_gen(this);
+
+int
+error_gen_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t flags, dict_t *xdata)
+{
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FSYNCDIR];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_FSYNCDIR);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL, 0);
- return 0;
+ STACK_UNWIND_STRICT (fsyncdir, frame, -1, op_errno, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_getdents_cbk,
+ STACK_WIND (frame, error_gen_fsyncdir_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getdents,
- fd,
- size,
- offset,
- flag);
- return 0;
+ FIRST_CHILD(this)->fops->fsyncdir,
+ fd, flags, xdata);
+ return 0;
}
-int32_t
-error_gen_setdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
+int
+error_gen_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf, xdata);
+ return 0;
}
-int32_t
-error_gen_setdents (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags,
- dir_entry_t *entries,
- int32_t count)
-{
- int op_errno = 0;
- op_errno = error_gen(this);
+
+int
+error_gen_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_STATFS];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_STATFS);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL, 0);
- return 0;
+ STACK_UNWIND_STRICT (statfs, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_setdents_cbk,
+ STACK_WIND (frame, error_gen_statfs_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setdents,
- fd,
- flags,
- entries,
- count);
- return 0;
+ FIRST_CHILD(this)->fops->statfs,
+ loc, xdata);
+ return 0;
}
-int32_t
-error_gen_fsyncdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
+int
+error_gen_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
}
-int32_t
-error_gen_fsyncdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags)
+
+int
+error_gen_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *dict, int32_t flags, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_SETXATTR];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_SETXATTR);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (setxattr, frame, -1, op_errno, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_fsyncdir_cbk,
+ STACK_WIND (frame, error_gen_setxattr_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsyncdir,
- fd,
- flags);
- return 0;
+ FIRST_CHILD(this)->fops->setxattr,
+ loc, dict, flags, xdata);
+ return 0;
}
-int32_t
-error_gen_statfs_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct statvfs *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
+int
+error_gen_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
-int32_t
-error_gen_statfs (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
+
+int
+error_gen_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_GETXATTR];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_GETXATTR);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (getxattr, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_statfs_cbk,
+ STACK_WIND (frame, error_gen_getxattr_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->statfs,
- loc);
- return 0;
+ FIRST_CHILD(this)->fops->getxattr,
+ loc, name, xdata);
+ return 0;
}
-
-int32_t
-error_gen_setxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
+int
+error_gen_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata);
+ return 0;
}
-int32_t
-error_gen_setxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *dict,
- int32_t flags)
-{
- int op_errno = 0;
- op_errno = error_gen(this);
+
+int
+error_gen_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FSETXATTR];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_FSETXATTR);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (fsetxattr, frame, -1, op_errno, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_setxattr_cbk,
+ STACK_WIND (frame, error_gen_fsetxattr_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr,
- loc,
- dict,
- flags);
- return 0;
+ FIRST_CHILD(this)->fops->fsetxattr,
+ fd, dict, flags, xdata);
+ return 0;
}
-int32_t
-error_gen_getxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- dict);
- return 0;
+
+int
+error_gen_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
-int32_t
-error_gen_getxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
+
+int
+error_gen_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FGETXATTR];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_FGETXATTR);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (fgetxattr, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_getxattr_cbk,
+ STACK_WIND (frame, error_gen_fgetxattr_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr,
- loc,
- name);
- return 0;
+ FIRST_CHILD(this)->fops->fgetxattr,
+ fd, name, xdata);
+ return 0;
}
-int32_t
-error_gen_xattrop_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
-{
- STACK_UNWIND (frame, op_ret, op_errno, dict);
- return 0;
+
+int
+error_gen_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
-int32_t
-error_gen_xattrop (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- gf_xattrop_flags_t flags,
- dict_t *dict)
-{
- int op_errno = 0;
- op_errno = error_gen(this);
+
+int
+error_gen_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_XATTROP];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_XATTROP);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (xattrop, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_xattrop_cbk,
+ STACK_WIND (frame, error_gen_xattrop_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->xattrop,
- loc, flags, dict);
- return 0;
+ loc, flags, dict, xdata);
+ return 0;
}
-int32_t
-error_gen_fxattrop_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
-{
- STACK_UNWIND (frame, op_ret, op_errno, dict);
- return 0;
+
+int
+error_gen_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
-int32_t
-error_gen_fxattrop (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- gf_xattrop_flags_t flags,
- dict_t *dict)
-{
- int op_errno = 0;
- op_errno = error_gen(this);
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- error_gen_fxattrop_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fxattrop,
- fd, flags, dict);
- return 0;
+
+int
+error_gen_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FXATTROP];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_FXATTROP);
+
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND_STRICT (fxattrop, frame, -1, op_errno, NULL, xdata);
+ return 0;
+ }
+
+ STACK_WIND (frame, error_gen_fxattrop_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fxattrop,
+ fd, flags, dict, xdata);
+ return 0;
}
-int32_t
-error_gen_removexattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
+
+int
+error_gen_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata);
+ return 0;
}
-int32_t
-error_gen_removexattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
+
+int
+error_gen_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_REMOVEXATTR];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_REMOVEXATTR);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (removexattr, frame, -1, op_errno, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_removexattr_cbk,
+ STACK_WIND (frame, error_gen_removexattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->removexattr,
- loc,
- name);
- return 0;
+ loc, name, xdata);
+ return 0;
}
-int32_t
-error_gen_lk_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct flock *lock)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- lock);
- return 0;
+int
+error_gen_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata);
+ return 0;
}
-int32_t
-error_gen_lk (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t cmd,
- struct flock *lock)
-{
- int op_errno = 0;
- op_errno = error_gen(this);
+
+int
+error_gen_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FREMOVEXATTR];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_FREMOVEXATTR);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (fremovexattr, frame, -1, op_errno, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_lk_cbk,
+ STACK_WIND (frame, error_gen_fremovexattr_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lk,
- fd,
- cmd,
- lock);
- return 0;
+ FIRST_CHILD(this)->fops->fremovexattr,
+ fd, name, xdata);
+ return 0;
}
-int32_t
-error_gen_inodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+int
+error_gen_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock, xdata);
+ return 0;
+}
+
+int
+error_gen_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_LK];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_LK);
+
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND_STRICT (lk, frame, -1, op_errno, NULL, xdata);
+ return 0;
+ }
+
+ STACK_WIND (frame, error_gen_lk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lk,
+ fd, cmd, lock, xdata);
+ return 0;
}
-int32_t
+int
+error_gen_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+
+int
error_gen_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd,
- struct flock *lock)
+ const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_INODELK];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_INODELK);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (inodelk, frame, -1, op_errno, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_inodelk_cbk,
+ STACK_WIND (frame, error_gen_inodelk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->inodelk,
- volume, loc, cmd, lock);
- return 0;
+ volume, loc, cmd, lock, xdata);
+ return 0;
}
-int32_t
-error_gen_finodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
+int
+error_gen_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno, xdata);
+ return 0;
}
-int32_t
+int
error_gen_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd,
- struct flock *lock)
+ const char *volume, fd_t *fd, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FINODELK];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_FINODELK);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (finodelk, frame, -1, op_errno, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_finodelk_cbk,
+ STACK_WIND (frame, error_gen_finodelk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->finodelk,
- volume, fd, cmd, lock);
- return 0;
+ volume, fd, cmd, lock, xdata);
+ return 0;
}
-int32_t
-error_gen_entrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
+int
+error_gen_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno, xdata);
+ return 0;
}
-int32_t
+
+int
error_gen_entrylk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_ENTRYLK];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_ENTRYLK);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (entrylk, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_entrylk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->entrylk,
- volume, loc, basename, cmd, type);
- return 0;
+ volume, loc, basename, cmd, type, xdata);
+ return 0;
}
-int32_t
-error_gen_fentrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+int
+error_gen_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (fentrylk, frame, op_ret, op_errno, xdata);
+ return 0;
}
-int32_t
+
+int
error_gen_fentrylk (call_frame_t *frame, xlator_t *this,
const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FENTRYLK];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_FENTRYLK);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (fentrylk, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_fentrylk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fentrylk,
- volume, fd, basename, cmd, type);
- return 0;
+ volume, fd, basename, cmd, type, xdata);
+ return 0;
}
/* Management operations */
-int32_t
-error_gen_stats_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct xlator_stats *stats)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- stats);
- return 0;
+
+int
+error_gen_getspec_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, char *spec_data)
+{
+ STACK_UNWIND_STRICT (getspec, frame, op_ret, op_errno, spec_data);
+ return 0;
}
-int32_t
-error_gen_stats (call_frame_t *frame,
- xlator_t *this,
- int32_t flags)
+int
+error_gen_getspec (call_frame_t *frame, xlator_t *this, const char *key,
+ int32_t flags)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_GETSPEC];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_GETSPEC);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
+ STACK_UNWIND_STRICT (getspec, frame, -1, op_errno, NULL);
return 0;
}
- STACK_WIND (frame,
- error_gen_stats_cbk,
+ STACK_WIND (frame, error_gen_getspec_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->mops->stats,
- flags);
+ FIRST_CHILD(this)->fops->getspec,
+ key, flags);
return 0;
}
-
-int32_t
-error_gen_getspec_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- char *spec_data)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- spec_data);
+int
+error_gen_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries, xdata);
return 0;
}
-int32_t
-error_gen_getspec (call_frame_t *frame,
- xlator_t *this,
- const char *key,
- int32_t flags)
+int
+error_gen_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t off, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_READDIR];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_READDIR);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
+ STACK_UNWIND_STRICT (readdir, frame, -1, op_errno, NULL, xdata);
return 0;
}
- STACK_WIND (frame,
- error_gen_getspec_cbk,
+ STACK_WIND (frame, error_gen_readdir_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->mops->getspec,
- key, flags);
+ FIRST_CHILD(this)->fops->readdir,
+ fd, size, off, xdata);
return 0;
}
-int32_t
-error_gen_checksum_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- uint8_t *file_checksum,
- uint8_t *dir_checksum)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- file_checksum,
- dir_checksum);
+int
+error_gen_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
return 0;
}
-int32_t
-error_gen_checksum (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flag)
+int
+error_gen_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *dict)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_READDIRP];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_READDIRP);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+ STACK_UNWIND_STRICT (readdirp, frame, -1, op_errno, NULL, NULL);
return 0;
}
- STACK_WIND (frame,
- error_gen_checksum_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->checksum,
- loc,
- flag);
+ STACK_WIND (frame, error_gen_readdirp_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp,
+ fd, size, off, dict);
return 0;
}
-int32_t
-error_gen_readdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *entries)
-{
- STACK_UNWIND (frame, op_ret, op_errno, entries);
- return 0;
+static void
+error_gen_set_failure (eg_t *pvt, int percent)
+{
+ GF_ASSERT (pvt);
+
+ if (percent)
+ pvt->failure_iter_no = 100/percent;
+ else
+ pvt->failure_iter_no = 100/GF_FAILURE_DEFAULT;
}
+static void
+error_gen_parse_fill_fops (eg_t *pvt, char *enable_fops)
+{
+ char *op_no_str = NULL;
+ int op_no = -1;
+ int i = 0;
+ xlator_t *this = THIS;
+ char *saveptr = NULL;
-int32_t
-error_gen_readdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t off)
-{
- int op_errno = 0;
- op_errno = error_gen(this);
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
- }
+ GF_ASSERT (pvt);
+ GF_ASSERT (this);
- STACK_WIND (frame,
- error_gen_readdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdir,
- fd, size, off);
- return 0;
+ for (i = 0; i < GF_FOP_MAXVALUE; i++)
+ pvt->enable[i] = 0;
+
+ if (!enable_fops) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "All fops are enabled.");
+ for (i = 0; i < GF_FOP_MAXVALUE; i++)
+ pvt->enable[i] = 1;
+ } else {
+ op_no_str = strtok_r (enable_fops, ",", &saveptr);
+ while (op_no_str) {
+ op_no = get_fop_int (&op_no_str);
+ if (op_no == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Wrong option value %s", op_no_str);
+ } else
+ pvt->enable[op_no] = 1;
+
+ op_no_str = strtok_r (NULL, ",", &saveptr);
+ }
+ }
}
int32_t
-error_gen_closedir (xlator_t *this,
- fd_t *fd)
+error_gen_priv_dump (xlator_t *this)
{
- return 0;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ int ret = -1;
+ eg_t *conf = NULL;
+
+ if (!this)
+ goto out;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ ret = TRY_LOCK(&conf->lock);
+ if (ret != 0) {
+ return ret;
+ }
+
+ gf_proc_dump_add_section("xlator.debug.error-gen.%s.priv", this->name);
+ gf_proc_dump_build_key(key_prefix,"xlator.debug.error-gen","%s.priv",
+ this->name);
+
+ gf_proc_dump_write("op_count", "%d", conf->op_count);
+ gf_proc_dump_write("failure_iter_no", "%d", conf->failure_iter_no);
+ gf_proc_dump_write("error_no", "%s", conf->error_no);
+ gf_proc_dump_write("random_failure", "%d", conf->random_failure);
+
+ UNLOCK(&conf->lock);
+out:
+ return ret;
}
int32_t
-error_gen_close (xlator_t *this,
- fd_t *fd)
+mem_acct_init (xlator_t *this)
{
- return 0;
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_error_gen_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ " failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ eg_t *pvt = NULL;
+ int32_t ret = 0;
+ char *error_enable_fops = NULL;
+ int32_t failure_percent_int = 0;
+
+ if (!this || !this->private)
+ goto out;
+
+ pvt = this->private;
+
+ GF_OPTION_RECONF ("error-no", pvt->error_no, options, str, out);
+
+ GF_OPTION_RECONF ("failure", failure_percent_int, options, int32,
+ out);
+
+ GF_OPTION_RECONF ("enable", error_enable_fops, options, str, out);
+
+ GF_OPTION_RECONF ("random-failure", pvt->random_failure, options,
+ bool, out);
+
+ error_gen_parse_fill_fops (pvt, error_enable_fops);
+ error_gen_set_failure (pvt, failure_percent_int);
+
+ ret = 0;
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "reconfigure returning %d", ret);
+ return ret;
}
int
init (xlator_t *this)
{
- eg_t *pvt = NULL;
+ eg_t *pvt = NULL;
+ int32_t ret = 0;
+ char *error_enable_fops = NULL;
+ int32_t failure_percent_int = 0;
- if (!this->children || this->children->next) {
- gf_log (this->name, GF_LOG_ERROR,
- "error-gen not configured with one subvolume");
- return -1;
- }
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error-gen not configured with one subvolume");
+ ret = -1;
+ goto out;
+ }
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
-
- pvt = CALLOC (1, sizeof (eg_t));
- this->private = pvt;
- return 0;
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ pvt = GF_CALLOC (1, sizeof (eg_t), gf_error_gen_mt_eg_t);
+
+ if (!pvt) {
+ ret = -1;
+ goto out;
+ }
+
+ LOCK_INIT (&pvt->lock);
+
+ GF_OPTION_INIT ("error-no", pvt->error_no, str, out);
+
+ GF_OPTION_INIT ("failure", failure_percent_int, int32, out);
+
+ GF_OPTION_INIT ("enable", error_enable_fops, str, out);
+
+ GF_OPTION_INIT ("random-failure", pvt->random_failure, bool, out);
+
+
+ error_gen_parse_fill_fops (pvt, error_enable_fops);
+ error_gen_set_failure (pvt, failure_percent_int);
+
+ this->private = pvt;
+
+ /* Give some seed value here */
+ srand (time(NULL));
+out:
+ if (ret)
+ GF_FREE (pvt);
+ return ret;
}
+
void
fini (xlator_t *this)
{
- gf_log (this->name, GF_LOG_DEBUG, "fini called");
- return;
+ eg_t *pvt = NULL;
+
+ if (!this)
+ return;
+ pvt = this->private;
+
+ if (pvt) {
+ LOCK_DESTROY (&pvt->lock);
+ GF_FREE (pvt);
+ gf_log (this->name, GF_LOG_DEBUG, "fini called");
+ }
+ return;
}
+struct xlator_dumpops dumpops = {
+ .priv = error_gen_priv_dump,
+};
+
+struct xlator_fops cbks;
struct xlator_fops fops = {
.lookup = error_gen_lookup,
@@ -1740,10 +2170,7 @@ struct xlator_fops fops = {
.symlink = error_gen_symlink,
.rename = error_gen_rename,
.link = error_gen_link,
- .chmod = error_gen_chmod,
- .chown = error_gen_chown,
.truncate = error_gen_truncate,
- .utimens = error_gen_utimens,
.create = error_gen_create,
.open = error_gen_open,
.readv = error_gen_readv,
@@ -1754,33 +2181,52 @@ struct xlator_fops fops = {
.setxattr = error_gen_setxattr,
.getxattr = error_gen_getxattr,
.removexattr = error_gen_removexattr,
+ .fsetxattr = error_gen_fsetxattr,
+ .fgetxattr = error_gen_fgetxattr,
+ .fremovexattr = error_gen_fremovexattr,
.opendir = error_gen_opendir,
.readdir = error_gen_readdir,
- .getdents = error_gen_getdents,
+ .readdirp = error_gen_readdirp,
.fsyncdir = error_gen_fsyncdir,
.access = error_gen_access,
.ftruncate = error_gen_ftruncate,
.fstat = error_gen_fstat,
.lk = error_gen_lk,
- .fchmod = error_gen_fchmod,
- .fchown = error_gen_fchown,
- .setdents = error_gen_setdents,
.lookup_cbk = error_gen_lookup_cbk,
- .checksum = error_gen_checksum,
.xattrop = error_gen_xattrop,
.fxattrop = error_gen_fxattrop,
.inodelk = error_gen_inodelk,
.finodelk = error_gen_finodelk,
.entrylk = error_gen_entrylk,
- .fentrylk = error_gen_fentrylk
-};
-
-struct xlator_mops mops = {
- .stats = error_gen_stats,
- .getspec = error_gen_getspec,
+ .fentrylk = error_gen_fentrylk,
+ .setattr = error_gen_setattr,
+ .fsetattr = error_gen_fsetattr,
+ .getspec = error_gen_getspec,
};
-struct xlator_cbks cbks = {
- .release = error_gen_close,
- .releasedir = error_gen_closedir,
+struct volume_options options[] = {
+ { .key = {"failure"},
+ .type = GF_OPTION_TYPE_INT,
+ .description = "Percentage failure of operations when enabled.",
+ },
+
+ { .key = {"error-no"},
+ .value = {"ENOENT","ENOTDIR","ENAMETOOLONG","EACCES","EBADF",
+ "EFAULT","ENOMEM","EINVAL","EIO","EEXIST","ENOSPC",
+ "EPERM","EROFS","EBUSY","EISDIR","ENOTEMPTY","EMLINK"
+ "ENODEV","EXDEV","EMFILE","ENFILE","ENOSYS","EINTR",
+ "EFBIG","EAGAIN","GF_ERROR_SHORT_WRITE"},
+ .type = GF_OPTION_TYPE_STR,
+ },
+
+ { .key = {"random-failure"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ },
+
+ { .key = {"enable"},
+ .type = GF_OPTION_TYPE_STR,
+ },
+
+ { .key = {NULL} }
};
diff --git a/xlators/debug/error-gen/src/error-gen.h b/xlators/debug/error-gen/src/error-gen.h
new file mode 100644
index 000000000..d92c23062
--- /dev/null
+++ b/xlators/debug/error-gen/src/error-gen.h
@@ -0,0 +1,48 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _ERROR_GEN_H
+#define _ERROR_GEN_H
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "error-gen-mem-types.h"
+
+#define GF_FAILURE_DEFAULT 10
+
+/*
+ * Pseudo-errors refer to errors beyond the scope of traditional <-1, op_errno>
+ * returns. This facilitates the ability to return unexpected, but not -1 values
+ * and/or to inject operations that lead to implicit error conditions. The range
+ * for pseudo errors resides at a high value to avoid conflicts with the errno
+ * range.
+ */
+enum GF_PSEUDO_ERRORS {
+ GF_ERROR_SHORT_WRITE = 1000, /* short writev return value */
+ GF_ERROR_MAX
+};
+
+typedef struct {
+ int enable[GF_FOP_MAXVALUE];
+ int op_count;
+ int failure_iter_no;
+ char *error_no;
+ gf_boolean_t random_failure;
+ gf_lock_t lock;
+} eg_t;
+
+typedef struct {
+ int error_no_count;
+ int error_no[20];
+} sys_error_t;
+
+#endif
diff --git a/xlators/debug/io-stats/src/Makefile.am b/xlators/debug/io-stats/src/Makefile.am
index 91aea6ebb..332d79015 100644
--- a/xlators/debug/io-stats/src/Makefile.am
+++ b/xlators/debug/io-stats/src/Makefile.am
@@ -2,12 +2,15 @@
xlator_LTLIBRARIES = io-stats.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/debug
-io_stats_la_LDFLAGS = -module -avoidversion
+io_stats_la_LDFLAGS = -module -avoid-version
io_stats_la_SOURCES = io-stats.c
io_stats_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+noinst_HEADERS = io-stats-mem-types.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/debug/io-stats/src/io-stats-mem-types.h b/xlators/debug/io-stats/src/io-stats-mem-types.h
new file mode 100644
index 000000000..c30dfb17e
--- /dev/null
+++ b/xlators/debug/io-stats/src/io-stats-mem-types.h
@@ -0,0 +1,24 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __IO_STATS_MEM_TYPES_H__
+#define __IO_STATS_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_io_stats_mem_types_ {
+ gf_io_stats_mt_ios_conf = gf_common_mt_end + 1,
+ gf_io_stats_mt_ios_fd,
+ gf_io_stats_mt_ios_stat,
+ gf_io_stats_mt_ios_stat_list,
+ gf_io_stats_mt_end
+};
+#endif
+
diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c
index 2a46f4d78..7fb697ae4 100644
--- a/xlators/debug/io-stats/src/io-stats.c
+++ b/xlators/debug/io-stats/src/io-stats.c
@@ -1,1458 +1,2855 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
+#include "xlator.h"
#endif
/**
* xlators/debug/io_stats :
- * This translator maintains a counter for each fop (that is,
- * a counter which stores the number of invocations for the certain
- * fop), and makes these counters accessible as extended attributes.
+ * This translator maintains statistics of all filesystem activity
+ * happening through it. The kind of statistics include:
+ *
+ * a) total read data - since process start, last interval and per fd
+ * b) total write data - since process start, last interval and per fd
+ * c) counts of read IO block size - since process start, last interval and per fd
+ * d) counts of write IO block size - since process start, last interval and per fd
+ * e) counts of all FOP types passing through it
+ *
+ * Usage: setfattr -n io-stats-dump /tmp/filename /mnt/gluster
+ *
*/
+#include <fnmatch.h>
#include <errno.h>
#include "glusterfs.h"
#include "xlator.h"
+#include "io-stats-mem-types.h"
+#include <stdarg.h>
+#include "defaults.h"
+#include "logging.h"
+
+#define MAX_LIST_MEMBERS 100
+
+typedef enum {
+ IOS_STATS_TYPE_NONE,
+ IOS_STATS_TYPE_OPEN,
+ IOS_STATS_TYPE_READ,
+ IOS_STATS_TYPE_WRITE,
+ IOS_STATS_TYPE_OPENDIR,
+ IOS_STATS_TYPE_READDIRP,
+ IOS_STATS_TYPE_READ_THROUGHPUT,
+ IOS_STATS_TYPE_WRITE_THROUGHPUT,
+ IOS_STATS_TYPE_MAX
+}ios_stats_type_t;
+
+typedef enum {
+ IOS_STATS_THRU_READ,
+ IOS_STATS_THRU_WRITE,
+ IOS_STATS_THRU_MAX,
+}ios_stats_thru_t;
+
+struct ios_stat_lat {
+ struct timeval time;
+ double throughput;
+};
-#define BUMP_HIT(op) \
-do { \
- LOCK (&((io_stats_private_t *)this->private)->lock); \
- { \
- ((io_stats_private_t *)this->private) \
- ->fop_records[GF_FOP_##op].hits++; \
- } \
- UNLOCK (&((io_stats_private_t *)this->private)->lock); \
- } while (0)
-
-struct io_stats_private {
- gf_lock_t lock;
- struct {
- char *name;
- int enabled;
- uint32_t hits;
- } fop_records[GF_FOP_MAXVALUE];
+struct ios_stat {
+ gf_lock_t lock;
+ uuid_t gfid;
+ char *filename;
+ uint64_t counters [IOS_STATS_TYPE_MAX];
+ struct ios_stat_lat thru_counters [IOS_STATS_THRU_MAX];
+ int refcnt;
};
-typedef struct io_stats_private io_stats_private_t;
-int32_t
-io_stats_create_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct stat *buf)
+struct ios_stat_list {
+ struct list_head list;
+ struct ios_stat *iosstat;
+ double value;
+};
+
+struct ios_stat_head {
+ gf_lock_t lock;
+ double min_cnt;
+ uint64_t members;
+ struct ios_stat_list *iosstats;
+};
+
+struct ios_lat {
+ double min;
+ double max;
+ double avg;
+};
+
+struct ios_global_stats {
+ uint64_t data_written;
+ uint64_t data_read;
+ uint64_t block_count_write[32];
+ uint64_t block_count_read[32];
+ uint64_t fop_hits[GF_FOP_MAXVALUE];
+ struct timeval started_at;
+ struct ios_lat latency[GF_FOP_MAXVALUE];
+ uint64_t nr_opens;
+ uint64_t max_nr_opens;
+ struct timeval max_openfd_time;
+};
+
+
+struct ios_conf {
+ gf_lock_t lock;
+ struct ios_global_stats cumulative;
+ uint64_t increment;
+ struct ios_global_stats incremental;
+ gf_boolean_t dump_fd_stats;
+ gf_boolean_t count_fop_hits;
+ gf_boolean_t measure_latency;
+ struct ios_stat_head list[IOS_STATS_TYPE_MAX];
+ struct ios_stat_head thru_list[IOS_STATS_THRU_MAX];
+};
+
+
+struct ios_fd {
+ char *filename;
+ uint64_t data_written;
+ uint64_t data_read;
+ uint64_t block_count_write[32];
+ uint64_t block_count_read[32];
+ struct timeval opened_at;
+};
+
+typedef enum {
+ IOS_DUMP_TYPE_NONE = 0,
+ IOS_DUMP_TYPE_FILE = 1,
+ IOS_DUMP_TYPE_DICT = 2,
+ IOS_DUMP_TYPE_MAX = 3
+} ios_dump_type_t;
+
+struct ios_dump_args {
+ ios_dump_type_t type;
+ union {
+ FILE *logfp;
+ dict_t *dict;
+ } u;
+};
+
+typedef int (*block_dump_func) (xlator_t *, struct ios_dump_args*,
+ int , int , uint64_t ) ;
+
+struct ios_local {
+ struct timeval wind_at;
+ struct timeval unwind_at;
+};
+
+struct volume_options options[];
+
+inline static int
+is_fop_latency_started (call_frame_t *frame)
+{
+ GF_ASSERT (frame);
+ struct timeval epoch = {0,};
+ return memcmp (&frame->begin, &epoch, sizeof (epoch));
+}
+
+#define END_FOP_LATENCY(frame, op) \
+ do { \
+ struct ios_conf *conf = NULL; \
+ \
+ conf = this->private; \
+ if (conf && conf->measure_latency) { \
+ gettimeofday (&frame->end, NULL); \
+ update_ios_latency (conf, frame, GF_FOP_##op); \
+ } \
+ } while (0)
+
+#define START_FOP_LATENCY(frame) \
+ do { \
+ struct ios_conf *conf = NULL; \
+ \
+ conf = this->private; \
+ if (conf && conf->measure_latency) { \
+ gettimeofday (&frame->begin, NULL); \
+ } else { \
+ memset (&frame->begin, 0, sizeof (frame->begin));\
+ } \
+ } while (0)
+
+
+#define BUMP_FOP(op) \
+ do { \
+ struct ios_conf *conf = NULL; \
+ \
+ conf = this->private; \
+ if (!conf) \
+ break; \
+ conf->cumulative.fop_hits[GF_FOP_##op]++; \
+ conf->incremental.fop_hits[GF_FOP_##op]++; \
+ } while (0)
+
+#define UPDATE_PROFILE_STATS(frame, op) \
+ do { \
+ struct ios_conf *conf = NULL; \
+ \
+ if (!is_fop_latency_started (frame)) \
+ break; \
+ conf = this->private; \
+ LOCK (&conf->lock); \
+ { \
+ if (conf && conf->measure_latency && \
+ conf->count_fop_hits) { \
+ BUMP_FOP(op); \
+ gettimeofday (&frame->end, NULL); \
+ update_ios_latency (conf, frame, GF_FOP_##op);\
+ } \
+ } \
+ UNLOCK (&conf->lock); \
+ } while (0)
+
+#define BUMP_READ(fd, len) \
+ do { \
+ struct ios_conf *conf = NULL; \
+ struct ios_fd *iosfd = NULL; \
+ int lb2 = 0; \
+ \
+ conf = this->private; \
+ lb2 = log_base2 (len); \
+ ios_fd_ctx_get (fd, this, &iosfd); \
+ if (!conf) \
+ break; \
+ \
+ LOCK (&conf->lock); \
+ { \
+ conf->cumulative.data_read += len; \
+ conf->incremental.data_read += len; \
+ conf->cumulative.block_count_read[lb2]++; \
+ conf->incremental.block_count_read[lb2]++; \
+ \
+ if (iosfd) { \
+ iosfd->data_read += len; \
+ iosfd->block_count_read[lb2]++; \
+ } \
+ } \
+ UNLOCK (&conf->lock); \
+ } while (0)
+
+
+#define BUMP_WRITE(fd, len) \
+ do { \
+ struct ios_conf *conf = NULL; \
+ struct ios_fd *iosfd = NULL; \
+ int lb2 = 0; \
+ \
+ conf = this->private; \
+ lb2 = log_base2 (len); \
+ ios_fd_ctx_get (fd, this, &iosfd); \
+ if (!conf) \
+ break; \
+ \
+ LOCK (&conf->lock); \
+ { \
+ conf->cumulative.data_written += len; \
+ conf->incremental.data_written += len; \
+ conf->cumulative.block_count_write[lb2]++; \
+ conf->incremental.block_count_write[lb2]++; \
+ \
+ if (iosfd) { \
+ iosfd->data_written += len; \
+ iosfd->block_count_write[lb2]++; \
+ } \
+ } \
+ UNLOCK (&conf->lock); \
+ } while (0)
+
+
+#define BUMP_STATS(iosstat, type) \
+ do { \
+ struct ios_conf *conf = NULL; \
+ uint64_t value = 0; \
+ \
+ conf = this->private; \
+ \
+ LOCK(&iosstat->lock); \
+ { \
+ iosstat->counters[type]++; \
+ value = iosstat->counters[type]; \
+ } \
+ UNLOCK (&iosstat->lock); \
+ ios_stat_add_to_list (&conf->list[type], \
+ value, iosstat); \
+ \
+ } while (0)
+
+
+#define BUMP_THROUGHPUT(iosstat, type) \
+ do { \
+ struct ios_conf *conf = NULL; \
+ double elapsed; \
+ struct timeval *begin, *end; \
+ double throughput; \
+ int flag = 0; \
+ \
+ begin = &frame->begin; \
+ end = &frame->end; \
+ \
+ elapsed = (end->tv_sec - begin->tv_sec) * 1e6 \
+ + (end->tv_usec - begin->tv_usec); \
+ throughput = op_ret / elapsed; \
+ \
+ conf = this->private; \
+ LOCK(&iosstat->lock); \
+ { \
+ if (iosstat->thru_counters[type].throughput \
+ <= throughput) { \
+ iosstat->thru_counters[type].throughput = \
+ throughput; \
+ gettimeofday (&iosstat-> \
+ thru_counters[type].time, NULL); \
+ flag = 1; \
+ } \
+ } \
+ UNLOCK (&iosstat->lock); \
+ if (flag) \
+ ios_stat_add_to_list (&conf->thru_list[type], \
+ throughput, iosstat); \
+ } while (0)
+
+int
+ios_fd_ctx_get (fd_t *fd, xlator_t *this, struct ios_fd **iosfd)
+{
+ uint64_t iosfd64 = 0;
+ unsigned long iosfdlong = 0;
+ int ret = 0;
+
+ ret = fd_ctx_get (fd, this, &iosfd64);
+ iosfdlong = iosfd64;
+ if (ret != -1)
+ *iosfd = (void *) iosfdlong;
+
+ return ret;
+}
+
+
+
+int
+ios_fd_ctx_set (fd_t *fd, xlator_t *this, struct ios_fd *iosfd)
+{
+ uint64_t iosfd64 = 0;
+ int ret = 0;
+
+ iosfd64 = (unsigned long) iosfd;
+ ret = fd_ctx_set (fd, this, iosfd64);
+
+ return ret;
+}
+
+int
+ios_stat_ref (struct ios_stat *iosstat)
+{
+ LOCK (&iosstat->lock);
+ {
+ iosstat->refcnt++;
+ }
+ UNLOCK (&iosstat->lock);
+
+ return iosstat->refcnt;
+}
+
+int
+ios_stat_unref (struct ios_stat *iosstat)
{
- STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf);
+ int cleanup = 0;
+ LOCK (&iosstat->lock);
+ {
+ iosstat->refcnt--;
+ if (iosstat->refcnt == 0) {
+ if (iosstat->filename) {
+ GF_FREE (iosstat->filename);
+ iosstat->filename = NULL;
+ }
+ cleanup = 1;
+ }
+ }
+ UNLOCK (&iosstat->lock);
+
+ if (cleanup) {
+ GF_FREE (iosstat);
+ iosstat = NULL;
+ }
+
return 0;
}
-int32_t
-io_stats_open_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
+int
+ios_inode_ctx_set (inode_t *inode, xlator_t *this, struct ios_stat *iosstat)
+{
+ uint64_t iosstat64 = 0;
+ int ret = 0;
+
+ ios_stat_ref (iosstat);
+ iosstat64 = (unsigned long )iosstat;
+ ret = inode_ctx_put (inode, this, iosstat64);
+ return ret;
+}
+
+int
+ios_inode_ctx_get (inode_t *inode, xlator_t *this, struct ios_stat **iosstat)
+{
+ uint64_t iosstat64 = 0;
+ unsigned long iosstatlong = 0;
+ int ret = 0;
+
+ ret = inode_ctx_get (inode, this, &iosstat64);
+ iosstatlong = iosstat64;
+ if (ret != -1)
+ *iosstat = (void *) iosstatlong;
+
+ return ret;
+
+}
+
+int
+ios_stat_add_to_list (struct ios_stat_head *list_head, uint64_t value,
+ struct ios_stat *iosstat)
{
- STACK_UNWIND (frame, op_ret, op_errno, fd);
+ struct ios_stat_list *new = NULL;
+ struct ios_stat_list *entry = NULL;
+ struct ios_stat_list *t = NULL;
+ struct ios_stat_list *list_entry = NULL;
+ struct ios_stat_list *tmp = NULL;
+ struct ios_stat_list *last = NULL;
+ struct ios_stat *stat = NULL;
+ int cnt = 0;
+ int found = 0;
+ int reposition = 0;
+ double min_count = 0;
+
+ LOCK (&list_head->lock);
+ {
+
+ if (list_head->min_cnt == 0)
+ list_head->min_cnt = value;
+ if ((list_head->members == MAX_LIST_MEMBERS) &&
+ (list_head->min_cnt > value))
+ goto out;
+
+ list_for_each_entry_safe (entry, t,
+ &list_head->iosstats->list, list) {
+ cnt++;
+ if (cnt == list_head->members)
+ last = entry;
+
+ if (!uuid_compare (iosstat->gfid,
+ entry->iosstat->gfid)) {
+ list_entry = entry;
+ found = cnt;
+ entry->value = value;
+ if (!reposition) {
+ if (cnt == list_head->members)
+ list_head->min_cnt = value;
+ goto out;
+ }
+ break;
+ } else if (entry->value <= value && !reposition) {
+ reposition = cnt;
+ tmp = entry;
+ if (cnt == list_head->members - 1)
+ min_count = entry->value;
+ }
+ }
+ if (found) {
+ list_del (&list_entry->list);
+ list_add_tail (&list_entry->list, &tmp->list);
+ if (min_count)
+ list_head->min_cnt = min_count;
+ goto out;
+ } else if (list_head->members == MAX_LIST_MEMBERS && reposition) {
+ new = GF_CALLOC (1, sizeof (*new),
+ gf_io_stats_mt_ios_stat_list);
+ new->iosstat = iosstat;
+ new->value = value;
+ ios_stat_ref (iosstat);
+ list_add_tail (&new->list, &tmp->list);
+ stat = last->iosstat;
+ last->iosstat = NULL;
+ ios_stat_unref (stat);
+ list_del (&last->list);
+ GF_FREE (last);
+ if (reposition == MAX_LIST_MEMBERS)
+ list_head->min_cnt = value;
+ else if (min_count) {
+ list_head->min_cnt = min_count;
+ }
+ } else if (list_head->members < MAX_LIST_MEMBERS) {
+ new = GF_CALLOC (1, sizeof (*new),
+ gf_io_stats_mt_ios_stat_list);
+ new->iosstat = iosstat;
+ new->value = value;
+ ios_stat_ref (iosstat);
+ if (reposition) {
+ list_add_tail (&new->list, &tmp->list);
+ } else {
+ list_add_tail (&new->list, &entry->list);
+ }
+ list_head->members++;
+ if (list_head->min_cnt > value)
+ list_head->min_cnt = value;
+ }
+ }
+out:
+ UNLOCK (&list_head->lock);
return 0;
}
-int32_t
-io_stats_stat_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+inline int
+ios_stats_cleanup (xlator_t *this, inode_t *inode)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+
+ struct ios_stat *iosstat = NULL;
+ uint64_t iosstat64 = 0;
+
+ inode_ctx_del (inode, this, &iosstat64);
+ if (!iosstat64) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "could not get inode ctx");
+ return 0;
+ }
+ iosstat = (void *) (long)iosstat64;
+ if (iosstat) {
+ ios_stat_unref (iosstat);
+ }
return 0;
}
-int32_t
-io_stats_readv_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct stat *buf,
- struct iobref *iobref)
+#define ios_log(this, logfp, fmt ...) \
+ do { \
+ if (logfp) { \
+ fprintf (logfp, fmt); \
+ fprintf (logfp, "\n"); \
+ } \
+ gf_log (this->name, GF_LOG_INFO, fmt); \
+ } while (0)
+
+int
+ios_dump_file_stats (struct ios_stat_head *list_head, xlator_t *this, FILE* logfp)
{
- STACK_UNWIND (frame, op_ret, op_errno, vector, count, buf, iobref);
+ struct ios_stat_list *entry = NULL;
+
+ LOCK (&list_head->lock);
+ {
+ list_for_each_entry (entry, &list_head->iosstats->list, list) {
+ ios_log (this, logfp, "%-12.0f %s",
+ entry->value, entry->iosstat->filename);
+ }
+ }
+ UNLOCK (&list_head->lock);
return 0;
}
-int32_t
-io_stats_writev_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+int
+ios_dump_throughput_stats (struct ios_stat_head *list_head, xlator_t *this,
+ FILE* logfp, ios_stats_type_t type)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ struct ios_stat_list *entry = NULL;
+ struct timeval time = {0, };
+ char timestr[256] = {0, };
+
+ LOCK (&list_head->lock);
+ {
+ list_for_each_entry (entry, &list_head->iosstats->list, list) {
+ gf_time_fmt (timestr, sizeof timestr,
+ entry->iosstat->thru_counters[type].time.tv_sec,
+ gf_timefmt_FT);
+ snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS, time.tv_usec);
+
+ ios_log (this, logfp, "%s \t %-10.2f \t %s",
+ timestr, entry->value, entry->iosstat->filename);
+ }
+ }
+ UNLOCK (&list_head->lock);
+ return 0;
+}
+
+int
+io_stats_dump_global_to_logfp (xlator_t *this, struct ios_global_stats *stats,
+ struct timeval *now, int interval, FILE* logfp)
+{
+ int i = 0;
+ int per_line = 0;
+ int index = 0;
+ struct ios_stat_head *list_head = NULL;
+ struct ios_conf *conf = NULL;
+ char timestr[256] = {0, };
+ char str_header[128] = {0};
+ char str_read[128] = {0};
+ char str_write[128] = {0};
+
+ conf = this->private;
+
+ if (interval == -1)
+ ios_log (this, logfp, "\n=== Cumulative stats ===");
+ else
+ ios_log (this, logfp, "\n=== Interval %d stats ===",
+ interval);
+ ios_log (this, logfp, " Duration : %"PRId64" secs",
+ (uint64_t) (now->tv_sec - stats->started_at.tv_sec));
+ ios_log (this, logfp, " BytesRead : %"PRId64,
+ stats->data_read);
+ ios_log (this, logfp, " BytesWritten : %"PRId64"\n",
+ stats->data_written);
+
+ snprintf (str_header, sizeof (str_header), "%-12s %c", "Block Size", ':');
+ snprintf (str_read, sizeof (str_read), "%-12s %c", "Read Count", ':');
+ snprintf (str_write, sizeof (str_write), "%-12s %c", "Write Count", ':');
+ index = 14;
+ for (i = 0; i < 32; i++) {
+ if ((stats->block_count_read[i] == 0) &&
+ (stats->block_count_write[i] == 0))
+ continue;
+ per_line++;
+
+ snprintf (str_header+index, sizeof (str_header)-index,
+ "%16dB+", (1<<i));
+ if (stats->block_count_read[i])
+ snprintf (str_read+index, sizeof (str_read)-index,
+ "%18"PRId64, stats->block_count_read[i]);
+ else snprintf (str_read+index, sizeof (str_read)-index,
+ "%18s", "0");
+ if (stats->block_count_write[i])
+ snprintf (str_write+index, sizeof (str_write)-index,
+ "%18"PRId64, stats->block_count_write[i]);
+ else snprintf (str_write+index, sizeof (str_write)-index,
+ "%18s", "0");
+
+ index += 18;
+ if (per_line == 3) {
+ ios_log (this, logfp, "%s", str_header);
+ ios_log (this, logfp, "%s", str_read);
+ ios_log (this, logfp, "%s\n", str_write);
+
+ memset (str_header, 0, sizeof (str_header));
+ memset (str_read, 0, sizeof (str_read));
+ memset (str_write, 0, sizeof (str_write));
+
+ snprintf (str_header, sizeof (str_header), "%-12s %c",
+ "Block Size", ':');
+ snprintf (str_read, sizeof (str_read), "%-12s %c",
+ "Read Count", ':');
+ snprintf (str_write, sizeof (str_write), "%-12s %c",
+ "Write Count", ':');
+
+ index = 14;
+ per_line = 0;
+ }
+ }
+
+ if (per_line != 0) {
+ ios_log (this, logfp, "%s", str_header);
+ ios_log (this, logfp, "%s", str_read);
+ ios_log (this, logfp, "%s\n", str_write);
+ }
+
+ ios_log (this, logfp, "%-13s %10s %14s %14s %14s", "Fop",
+ "Call Count", "Avg-Latency", "Min-Latency",
+ "Max-Latency");
+ ios_log (this, logfp, "%-13s %10s %14s %14s %14s", "---", "----------",
+ "-----------", "-----------", "-----------");
+
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ if (stats->fop_hits[i] && !stats->latency[i].avg)
+ ios_log (this, logfp, "%-13s %10"PRId64" %11s "
+ "us %11s us %11s us", gf_fop_list[i],
+ stats->fop_hits[i], "0", "0", "0");
+ else if (stats->fop_hits[i] && stats->latency[i].avg)
+ ios_log (this, logfp, "%-13s %10"PRId64" %11.2lf us "
+ "%11.2lf us %11.2lf us", gf_fop_list[i],
+ stats->fop_hits[i], stats->latency[i].avg,
+ stats->latency[i].min, stats->latency[i].max);
+ }
+ ios_log (this, logfp, "------ ----- ----- ----- ----- ----- ----- ----- "
+ " ----- ----- ----- -----\n");
+
+ if (interval == -1) {
+ LOCK (&conf->lock);
+ {
+ gf_time_fmt (timestr, sizeof timestr,
+ conf->cumulative.max_openfd_time.tv_sec,
+ gf_timefmt_FT);
+ snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS,
+ conf->cumulative.max_openfd_time.tv_usec);
+ ios_log (this, logfp, "Current open fd's: %"PRId64
+ " Max open fd's: %"PRId64" time %s",
+ conf->cumulative.nr_opens,
+ conf->cumulative.max_nr_opens, timestr);
+ }
+ UNLOCK (&conf->lock);
+ ios_log (this, logfp, "\n==========Open File Stats========");
+ ios_log (this, logfp, "\nCOUNT: \t FILE NAME");
+ list_head = &conf->list[IOS_STATS_TYPE_OPEN];
+ ios_dump_file_stats (list_head, this, logfp);
+
+
+ ios_log (this, logfp, "\n==========Read File Stats========");
+ ios_log (this, logfp, "\nCOUNT: \t FILE NAME");
+ list_head = &conf->list[IOS_STATS_TYPE_READ];
+ ios_dump_file_stats (list_head, this, logfp);
+
+ ios_log (this, logfp, "\n==========Write File Stats========");
+ ios_log (this, logfp, "\nCOUNT: \t FILE NAME");
+ list_head = &conf->list[IOS_STATS_TYPE_WRITE];
+ ios_dump_file_stats (list_head, this, logfp);
+
+ ios_log (this, logfp, "\n==========Directory open stats========");
+ ios_log (this, logfp, "\nCOUNT: \t DIRECTORY NAME");
+ list_head = &conf->list[IOS_STATS_TYPE_OPENDIR];
+ ios_dump_file_stats (list_head, this, logfp);
+
+ ios_log (this, logfp, "\n========Directory readdirp Stats=======");
+ ios_log (this, logfp, "\nCOUNT: \t DIRECTORY NAME");
+ list_head = &conf->list[IOS_STATS_TYPE_READDIRP];
+ ios_dump_file_stats (list_head, this, logfp);
+
+ ios_log (this, logfp, "\n========Read Throughput File Stats=====");
+ ios_log (this, logfp, "\nTIMESTAMP \t\t\t THROUGHPUT(KBPS)"
+ "\tFILE NAME");
+ list_head = &conf->thru_list[IOS_STATS_THRU_READ];
+ ios_dump_throughput_stats(list_head, this, logfp, IOS_STATS_THRU_READ);
+
+ ios_log (this, logfp, "\n======Write Throughput File Stats======");
+ ios_log (this, logfp, "\nTIMESTAMP \t\t\t THROUGHPUT(KBPS)"
+ "\tFILE NAME");
+ list_head = &conf->thru_list[IOS_STATS_THRU_WRITE];
+ ios_dump_throughput_stats (list_head, this, logfp, IOS_STATS_THRU_WRITE);
+ }
return 0;
}
-int32_t
-io_stats_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entries,
- int32_t count)
+int
+io_stats_dump_global_to_dict (xlator_t *this, struct ios_global_stats *stats,
+ struct timeval *now, int interval, dict_t *dict)
+{
+ int ret = 0;
+ char key[256] = {0};
+ uint64_t sec = 0;
+ int i = 0;
+ uint64_t count = 0;
+
+ GF_ASSERT (stats);
+ GF_ASSERT (now);
+ GF_ASSERT (dict);
+ GF_ASSERT (this);
+
+ if (interval == -1)
+ snprintf (key, sizeof (key), "cumulative");
+ else
+ snprintf (key, sizeof (key), "interval");
+ ret = dict_set_int32 (dict, key, interval);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "failed to set "
+ "interval %d", interval);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-duration", interval);
+ sec = (uint64_t) (now->tv_sec - stats->started_at.tv_sec);
+ ret = dict_set_uint64 (dict, key, sec);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to set "
+ "duration(%d) - %"PRId64, interval, sec);
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-total-read", interval);
+ ret = dict_set_uint64 (dict, key, stats->data_read);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to set total "
+ "read(%d) - %"PRId64, interval, stats->data_read);
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-total-write", interval);
+ ret = dict_set_uint64 (dict, key, stats->data_written);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to set total "
+ "write(%d) - %"PRId64, interval, stats->data_written);
+ goto out;
+ }
+ for (i = 0; i < 32; i++) {
+ if (stats->block_count_read[i]) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-read-%d", interval,
+ (1 << i));
+ count = stats->block_count_read[i];
+ ret = dict_set_uint64 (dict, key, count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "set read-%db+, with: %"PRId64,
+ (1<<i), count);
+ goto out;
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++) {
+ if (stats->block_count_write[i]) {
+ snprintf (key, sizeof (key), "%d-write-%d", interval,
+ (1<<i));
+ count = stats->block_count_write[i];
+ ret = dict_set_uint64 (dict, key, count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "set write-%db+, with: %"PRId64,
+ (1<<i), count);
+ goto out;
+ }
+ }
+ }
+
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ if (stats->fop_hits[i] == 0)
+ continue;
+ snprintf (key, sizeof (key), "%d-%d-hits", interval, i);
+ ret = dict_set_uint64 (dict, key, stats->fop_hits[i]);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "set %s-fop-hits: %"PRIu64, gf_fop_list[i],
+ stats->fop_hits[i]);
+ goto out;
+ }
+
+ if (stats->latency[i].avg == 0)
+ continue;
+ snprintf (key, sizeof (key), "%d-%d-avglatency", interval, i);
+ ret = dict_set_double (dict, key, stats->latency[i].avg);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to set %s "
+ "avglatency(%d) with %f", gf_fop_list[i],
+ interval, stats->latency[i].avg);
+ goto out;
+ }
+ snprintf (key, sizeof (key), "%d-%d-minlatency", interval, i);
+ ret = dict_set_double (dict, key, stats->latency[i].min);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to set %s "
+ "minlatency(%d) with %f", gf_fop_list[i],
+ interval, stats->latency[i].min);
+ goto out;
+ }
+ snprintf (key, sizeof (key), "%d-%d-maxlatency", interval, i);
+ ret = dict_set_double (dict, key, stats->latency[i].max);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to set %s "
+ "maxlatency(%d) with %f", gf_fop_list[i],
+ interval, stats->latency[i].max);
+ goto out;
+ }
+ }
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret);
+ return ret;
+}
+
+int
+io_stats_dump_global (xlator_t *this, struct ios_global_stats *stats,
+ struct timeval *now, int interval,
+ struct ios_dump_args *args)
+{
+ int ret = -1;
+
+ GF_ASSERT (args);
+ GF_ASSERT (now);
+ GF_ASSERT (stats);
+ GF_ASSERT (this);
+
+
+
+ switch (args->type) {
+ case IOS_DUMP_TYPE_FILE:
+ ret = io_stats_dump_global_to_logfp (this, stats, now,
+ interval, args->u.logfp);
+ break;
+ case IOS_DUMP_TYPE_DICT:
+ ret = io_stats_dump_global_to_dict (this, stats, now,
+ interval, args->u.dict);
+ break;
+ default:
+ GF_ASSERT (0);
+ ret = -1;
+ break;
+ }
+ return ret;
+}
+
+int
+ios_dump_args_init (struct ios_dump_args *args, ios_dump_type_t type,
+ void *output)
+{
+ int ret = 0;
+
+ GF_ASSERT (args);
+ GF_ASSERT (type > IOS_DUMP_TYPE_NONE && type < IOS_DUMP_TYPE_MAX);
+ GF_ASSERT (output);
+
+ args->type = type;
+ switch (args->type) {
+ case IOS_DUMP_TYPE_FILE:
+ args->u.logfp = output;
+ break;
+ case IOS_DUMP_TYPE_DICT:
+ args->u.dict = output;
+ break;
+ default:
+ GF_ASSERT (0);
+ ret = -1;
+ }
+
+ return ret;
+}
+
+int
+io_stats_dump (xlator_t *this, struct ios_dump_args *args)
{
- STACK_UNWIND (frame, op_ret, op_errno, entries, count);
+ struct ios_conf *conf = NULL;
+ struct ios_global_stats cumulative = {0, };
+ struct ios_global_stats incremental = {0, };
+ int increment = 0;
+ struct timeval now;
+
+ GF_ASSERT (this);
+ GF_ASSERT (args);
+ GF_ASSERT (args->type > IOS_DUMP_TYPE_NONE);
+ GF_ASSERT (args->type < IOS_DUMP_TYPE_MAX);
+
+ conf = this->private;
+
+ gettimeofday (&now, NULL);
+ LOCK (&conf->lock);
+ {
+ cumulative = conf->cumulative;
+ incremental = conf->incremental;
+
+ increment = conf->increment++;
+
+ memset (&conf->incremental, 0, sizeof (conf->incremental));
+ conf->incremental.started_at = now;
+ }
+ UNLOCK (&conf->lock);
+
+ io_stats_dump_global (this, &cumulative, &now, -1, args);
+ io_stats_dump_global (this, &incremental, &now, increment, args);
+
return 0;
}
-int32_t
-io_stats_readdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *buf)
+
+int
+io_stats_dump_fd (xlator_t *this, struct ios_fd *iosfd)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ struct ios_conf *conf = NULL;
+ struct timeval now;
+ uint64_t sec = 0;
+ uint64_t usec = 0;
+ int i = 0;
+
+ conf = this->private;
+
+ if (!conf->dump_fd_stats)
+ return 0;
+
+ if (!iosfd)
+ return 0;
+
+ gettimeofday (&now, NULL);
+
+ if (iosfd->opened_at.tv_usec > now.tv_usec) {
+ now.tv_usec += 1000000;
+ now.tv_usec--;
+ }
+
+ sec = now.tv_sec - iosfd->opened_at.tv_sec;
+ usec = now.tv_usec - iosfd->opened_at.tv_usec;
+
+ gf_log (this->name, GF_LOG_INFO,
+ "--- fd stats ---");
+
+ if (iosfd->filename)
+ gf_log (this->name, GF_LOG_INFO,
+ " Filename : %s",
+ iosfd->filename);
+
+ if (sec)
+ gf_log (this->name, GF_LOG_INFO,
+ " Lifetime : %"PRId64"secs, %"PRId64"usecs",
+ sec, usec);
+
+ if (iosfd->data_read)
+ gf_log (this->name, GF_LOG_INFO,
+ " BytesRead : %"PRId64" bytes",
+ iosfd->data_read);
+
+ if (iosfd->data_written)
+ gf_log (this->name, GF_LOG_INFO,
+ " BytesWritten : %"PRId64" bytes",
+ iosfd->data_written);
+
+ for (i = 0; i < 32; i++) {
+ if (iosfd->block_count_read[i])
+ gf_log (this->name, GF_LOG_INFO,
+ " Read %06db+ : %"PRId64,
+ (1 << i), iosfd->block_count_read[i]);
+ }
+ for (i = 0; i < 32; i++) {
+ if (iosfd->block_count_write[i])
+ gf_log (this->name, GF_LOG_INFO,
+ "Write %06db+ : %"PRId64,
+ (1 << i), iosfd->block_count_write[i]);
+ }
return 0;
}
-int32_t
-io_stats_fsync_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+static void
+update_ios_latency_stats (struct ios_global_stats *stats, double elapsed,
+ glusterfs_fop_t op)
+{
+ double avg;
+
+ GF_ASSERT (stats);
+
+ if (!stats->latency[op].min)
+ stats->latency[op].min = elapsed;
+ if (stats->latency[op].min > elapsed)
+ stats->latency[op].min = elapsed;
+ if (stats->latency[op].max < elapsed)
+ stats->latency[op].max = elapsed;
+
+ avg = stats->latency[op].avg;
+
+ stats->latency[op].avg = avg + (elapsed - avg) / stats->fop_hits[op];
+}
+
+int
+update_ios_latency (struct ios_conf *conf, call_frame_t *frame,
+ glusterfs_fop_t op)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ double elapsed;
+ struct timeval *begin, *end;
+
+ begin = &frame->begin;
+ end = &frame->end;
+
+ elapsed = (end->tv_sec - begin->tv_sec) * 1e6
+ + (end->tv_usec - begin->tv_usec);
+
+ update_ios_latency_stats (&conf->cumulative, elapsed, op);
+ update_ios_latency_stats (&conf->incremental, elapsed, op);
+
return 0;
}
int32_t
-io_stats_chown_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+io_stats_dump_stats_to_dict (xlator_t *this, dict_t *resp,
+ ios_stats_type_t flags, int32_t list_cnt)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ struct ios_conf *conf = NULL;
+ int cnt = 0;
+ char key[256];
+ struct ios_stat_head *list_head = NULL;
+ struct ios_stat_list *entry = NULL;
+ int ret = -1;
+ ios_stats_thru_t index = IOS_STATS_THRU_MAX;
+ char timestr[256] = {0, };
+ char *dict_timestr = NULL;
+
+ conf = this->private;
+
+ switch (flags) {
+ case IOS_STATS_TYPE_OPEN:
+ list_head = &conf->list[IOS_STATS_TYPE_OPEN];
+ LOCK (&conf->lock);
+ {
+ ret = dict_set_uint64 (resp, "current-open",
+ conf->cumulative.nr_opens);
+ if (ret)
+ goto unlock;
+ ret = dict_set_uint64 (resp, "max-open",
+ conf->cumulative.max_nr_opens);
+
+ gf_time_fmt (timestr, sizeof timestr,
+ conf->cumulative.max_openfd_time.tv_sec,
+ gf_timefmt_FT);
+ if (conf->cumulative.max_openfd_time.tv_sec)
+ snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS,
+ conf->cumulative.max_openfd_time.tv_usec);
+
+ dict_timestr = gf_strdup (timestr);
+ if (!dict_timestr)
+ goto unlock;
+ ret = dict_set_dynstr (resp, "max-openfd-time",
+ dict_timestr);
+ if (ret)
+ goto unlock;
+ }
+ unlock:
+ UNLOCK (&conf->lock);
+ /* Do not proceed if we came here because of some error
+ * during the dict operation */
+ if (ret)
+ goto out;
+ break;
+ case IOS_STATS_TYPE_READ:
+ list_head = &conf->list[IOS_STATS_TYPE_READ];
+ break;
+ case IOS_STATS_TYPE_WRITE:
+ list_head = &conf->list[IOS_STATS_TYPE_WRITE];
+ break;
+ case IOS_STATS_TYPE_OPENDIR:
+ list_head = &conf->list[IOS_STATS_TYPE_OPENDIR];
+ break;
+ case IOS_STATS_TYPE_READDIRP:
+ list_head = &conf->list[IOS_STATS_TYPE_READDIRP];
+ break;
+ case IOS_STATS_TYPE_READ_THROUGHPUT:
+ list_head = &conf->thru_list[IOS_STATS_THRU_READ];
+ index = IOS_STATS_THRU_READ;
+ break;
+ case IOS_STATS_TYPE_WRITE_THROUGHPUT:
+ list_head = &conf->thru_list[IOS_STATS_THRU_WRITE];
+ index = IOS_STATS_THRU_WRITE;
+ break;
+
+ default:
+ goto out;
+ }
+ ret = dict_set_int32 (resp, "top-op", flags);
+ if (!list_cnt)
+ goto out;
+ LOCK (&list_head->lock);
+ {
+ list_for_each_entry (entry, &list_head->iosstats->list, list) {
+
+ cnt++;
+ snprintf (key, 256, "%s-%d", "filename", cnt);
+ ret = dict_set_str (resp, key, entry->iosstat->filename);
+ if (ret)
+ goto unlock_list_head;
+ snprintf (key, 256, "%s-%d", "value",cnt);
+ ret = dict_set_uint64 (resp, key, entry->value);
+ if (ret)
+ goto unlock_list_head;
+ if (index != IOS_STATS_THRU_MAX) {
+ snprintf (key, 256, "%s-%d", "time-sec", cnt);
+ ret = dict_set_int32 (resp, key,
+ entry->iosstat->thru_counters[index].time.tv_sec);
+ if (ret)
+ goto unlock_list_head;
+ snprintf (key, 256, "%s-%d", "time-usec", cnt);
+ ret = dict_set_int32 (resp, key,
+ entry->iosstat->thru_counters[index].time.tv_usec);
+ if (ret)
+ goto unlock_list_head;
+ }
+ if (cnt == list_cnt)
+ break;
+
+ }
+ }
+unlock_list_head:
+ UNLOCK (&list_head->lock);
+ /* ret is !=0 if some dict operation in the above critical region
+ * failed. */
+ if (ret)
+ goto out;
+ ret = dict_set_int32 (resp, "members", cnt);
+ out:
+ return ret;
+}
+
+int
+io_stats_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ struct ios_fd *iosfd = NULL;
+ char *path = NULL;
+ struct ios_stat *iosstat = NULL;
+ struct ios_conf *conf = NULL;
+
+ conf = this->private;
+
+ path = frame->local;
+ frame->local = NULL;
+
+ if (!path)
+ goto unwind;
+
+ if (op_ret < 0) {
+ GF_FREE (path);
+ goto unwind;
+ }
+
+ iosfd = GF_CALLOC (1, sizeof (*iosfd), gf_io_stats_mt_ios_fd);
+ if (!iosfd) {
+ GF_FREE (path);
+ goto unwind;
+ }
+
+ iosfd->filename = path;
+ gettimeofday (&iosfd->opened_at, NULL);
+
+ ios_fd_ctx_set (fd, this, iosfd);
+ LOCK (&conf->lock);
+ {
+ conf->cumulative.nr_opens++;
+ if (conf->cumulative.nr_opens > conf->cumulative.max_nr_opens) {
+ conf->cumulative.max_nr_opens = conf->cumulative.nr_opens;
+ conf->cumulative.max_openfd_time = iosfd->opened_at;
+ }
+ }
+ UNLOCK (&conf->lock);
+
+ iosstat = GF_CALLOC (1, sizeof (*iosstat), gf_io_stats_mt_ios_stat);
+ if (!iosstat) {
+ GF_FREE (path);
+ goto unwind;
+ }
+ iosstat->filename = gf_strdup (path);
+ uuid_copy (iosstat->gfid, buf->ia_gfid);
+ LOCK_INIT (&iosstat->lock);
+ ios_inode_ctx_set (fd->inode, this, iosstat);
+
+unwind:
+ UPDATE_PROFILE_STATS (frame, CREATE);
+ STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
return 0;
}
-int32_t
-io_stats_chmod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+
+int
+io_stats_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ struct ios_fd *iosfd = NULL;
+ char *path = NULL;
+ struct ios_stat *iosstat = NULL;
+ struct ios_conf *conf = NULL;
+
+ conf = this->private;
+ path = frame->local;
+ frame->local = NULL;
+
+ if (!path)
+ goto unwind;
+
+ if (op_ret < 0) {
+ GF_FREE (path);
+ goto unwind;
+ }
+
+ iosfd = GF_CALLOC (1, sizeof (*iosfd), gf_io_stats_mt_ios_fd);
+ if (!iosfd) {
+ GF_FREE (path);
+ goto unwind;
+ }
+
+ iosfd->filename = path;
+ gettimeofday (&iosfd->opened_at, NULL);
+
+ ios_fd_ctx_set (fd, this, iosfd);
+
+ ios_inode_ctx_get (fd->inode, this, &iosstat);
+ if (!iosstat) {
+ iosstat = GF_CALLOC (1, sizeof (*iosstat),
+ gf_io_stats_mt_ios_stat);
+ if (iosstat) {
+ iosstat->filename = gf_strdup (path);
+ uuid_copy (iosstat->gfid, fd->inode->gfid);
+ LOCK_INIT (&iosstat->lock);
+ ios_inode_ctx_set (fd->inode, this, iosstat);
+ }
+ }
+
+ LOCK (&conf->lock);
+ {
+ conf->cumulative.nr_opens++;
+ if (conf->cumulative.nr_opens > conf->cumulative.max_nr_opens) {
+ conf->cumulative.max_nr_opens = conf->cumulative.nr_opens;
+ conf->cumulative.max_openfd_time = iosfd->opened_at;
+ }
+ }
+ UNLOCK (&conf->lock);
+ if (iosstat) {
+ BUMP_STATS (iosstat, IOS_STATS_TYPE_OPEN);
+ iosstat = NULL;
+ }
+unwind:
+ UPDATE_PROFILE_STATS (frame, OPEN);
+
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
return 0;
+
}
-int32_t
-io_stats_fchmod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+
+int
+io_stats_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ UPDATE_PROFILE_STATS (frame, STAT);
+ STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata);
return 0;
}
-int32_t
-io_stats_fchown_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+
+int
+io_stats_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iovec *vector, int32_t count,
+ struct iatt *buf, struct iobref *iobref, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ int len = 0;
+ fd_t *fd = NULL;
+ struct ios_stat *iosstat = NULL;
+
+ fd = frame->local;
+ frame->local = NULL;
+
+ if (op_ret > 0) {
+ len = iov_length (vector, count);
+ BUMP_READ (fd, len);
+ }
+
+ UPDATE_PROFILE_STATS (frame, READ);
+ ios_inode_ctx_get (fd->inode, this, &iosstat);
+
+ if (iosstat) {
+ BUMP_STATS (iosstat, IOS_STATS_TYPE_READ);
+ BUMP_THROUGHPUT (iosstat, IOS_STATS_THRU_READ);
+ iosstat = NULL;
+ }
+
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno,
+ vector, count, buf, iobref, xdata);
return 0;
+
}
-int32_t
-io_stats_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+
+int
+io_stats_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ struct ios_stat *iosstat = NULL;
+ inode_t *inode = NULL;
+
+ UPDATE_PROFILE_STATS (frame, WRITE);
+ if (frame->local){
+ inode = frame->local;
+ frame->local = NULL;
+ ios_inode_ctx_get (inode, this, &iosstat);
+ if (iosstat) {
+ BUMP_STATS (iosstat, IOS_STATS_TYPE_WRITE);
+ BUMP_THROUGHPUT (iosstat, IOS_STATS_THRU_WRITE);
+ inode = NULL;
+ iosstat = NULL;
+ }
+ }
+
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, xdata);
return 0;
+
}
-int32_t
-io_stats_rename_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+
+
+
+int
+io_stats_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *buf, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ struct ios_stat *iosstat = NULL;
+ inode_t *inode = frame->local;
+
+ frame->local = NULL;
+
+ UPDATE_PROFILE_STATS (frame, READDIRP);
+
+ ios_inode_ctx_get (inode, this, &iosstat);
+
+ if (iosstat) {
+ BUMP_STATS (iosstat, IOS_STATS_TYPE_READDIRP);
+ iosstat = NULL;
+ }
+
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, buf, xdata);
return 0;
}
-int32_t
-io_stats_readlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- const char *buf)
+
+int
+io_stats_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *buf, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ UPDATE_PROFILE_STATS (frame, READDIR);
+ STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, buf, xdata);
return 0;
}
-int32_t
-io_stats_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf,
- dict_t *xattr)
+
+int
+io_stats_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf, xattr);
+ UPDATE_PROFILE_STATS (frame, FSYNC);
+ STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
return 0;
}
-int32_t
-io_stats_symlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
+
+int
+io_stats_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ UPDATE_PROFILE_STATS (frame, SETATTR);
+ STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop, xdata);
return 0;
}
-int32_t
-io_stats_mknod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
+
+int
+io_stats_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ UPDATE_PROFILE_STATS (frame, UNLINK);
+ STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
return 0;
+
}
-int32_t
-io_stats_mkdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
+int
+io_stats_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ UPDATE_PROFILE_STATS (frame, RENAME);
+ STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf,
+ preoldparent, postoldparent,
+ prenewparent, postnewparent, xdata);
return 0;
}
-int32_t
-io_stats_link_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
+
+int
+io_stats_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *buf,
+ struct iatt *sbuf, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ UPDATE_PROFILE_STATS (frame, READLINK);
+ STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, buf, sbuf, xdata);
return 0;
}
-int32_t
-io_stats_flush_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+
+int
+io_stats_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ dict_t *xdata, struct iatt *postparent)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ UPDATE_PROFILE_STATS (frame, LOOKUP);
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf, xdata,
+ postparent);
return 0;
}
-int32_t
-io_stats_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
+int
+io_stats_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, fd);
+ UPDATE_PROFILE_STATS (frame, SYMLINK);
+ STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
return 0;
}
-int32_t
-io_stats_rmdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+
+int
+io_stats_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ UPDATE_PROFILE_STATS (frame, MKNOD);
+ STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
return 0;
}
-int32_t
-io_stats_truncate_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+
+int
+io_stats_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ struct ios_stat *iosstat = NULL;
+ char *path = frame->local;
+
+ UPDATE_PROFILE_STATS (frame, MKDIR);
+ if (op_ret < 0)
+ goto unwind;
+
+ iosstat = GF_CALLOC (1, sizeof (*iosstat), gf_io_stats_mt_ios_stat);
+ if (iosstat) {
+ LOCK_INIT (&iosstat->lock);
+ iosstat->filename = gf_strdup(path);
+ uuid_copy (iosstat->gfid, buf->ia_gfid);
+ ios_inode_ctx_set (inode, this, iosstat);
+ }
+
+unwind:
+ /* local is assigned with path */
+ GF_FREE (frame->local);
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
return 0;
}
-int32_t
-io_stats_utimens_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+
+int
+io_stats_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ UPDATE_PROFILE_STATS (frame, LINK);
+ STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
return 0;
}
-int32_t
-io_stats_statfs_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct statvfs *buf)
+
+int
+io_stats_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ UPDATE_PROFILE_STATS (frame, FLUSH);
+ STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata);
return 0;
}
-int32_t
-io_stats_setxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+
+int
+io_stats_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ struct ios_stat *iosstat = NULL;
+ int ret = -1;
+
+ UPDATE_PROFILE_STATS (frame, OPENDIR);
+ if (op_ret < 0)
+ goto unwind;
+
+ ios_fd_ctx_set (fd, this, 0);
+
+ ret = ios_inode_ctx_get (fd->inode, this, &iosstat);
+ if (!ret)
+ BUMP_STATS (iosstat, IOS_STATS_TYPE_OPENDIR);
+
+unwind:
+ STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, xdata);
return 0;
}
-int32_t
-io_stats_getxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
-{
- io_stats_private_t *priv = this->private;
- int i = 0;
- char keycont[] = "trusted.glusterfs.hits." /* 23 chars */
- "0123456789"
- "0123456789"
- "0123456789"
- "0123456789";
- int ret = -1;
-
- memset (keycont + 23, '\0', 40);
- for (i = 0; i < GF_FOP_MAXVALUE; i++) {
- if (!(priv->fop_records[i].enabled &&
- priv->fop_records[i].name))
- continue;
+int
+io_stats_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
- strncpy (keycont + 23, priv->fop_records[i].name, 40);
- LOCK(&priv->lock);
- {
- ret = dict_set_uint32 (dict, keycont,
- priv->fop_records[i].hits);
- }
- UNLOCK(&priv->lock);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "dict set failed for xattr %s",
- keycont);
- break;
- }
- }
+ UPDATE_PROFILE_STATS (frame, RMDIR);
- STACK_UNWIND (frame, op_ret, op_errno, dict);
+ STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
return 0;
}
-int32_t
-io_stats_removexattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+
+int
+io_stats_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ UPDATE_PROFILE_STATS (frame, TRUNCATE);
+ STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
return 0;
}
-int32_t
-io_stats_fsyncdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+int
+io_stats_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ UPDATE_PROFILE_STATS (frame, STATFS);
+ STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf, xdata);
return 0;
}
-int32_t
-io_stats_access_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+
+int
+io_stats_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ UPDATE_PROFILE_STATS (frame, SETXATTR);
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
return 0;
}
-int32_t
-io_stats_ftruncate_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+
+int
+io_stats_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ UPDATE_PROFILE_STATS (frame, GETXATTR);
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
return 0;
}
-int32_t
-io_stats_fstat_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+
+int
+io_stats_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ UPDATE_PROFILE_STATS (frame, REMOVEXATTR);
+ STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata);
return 0;
}
-int32_t
-io_stats_lk_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct flock *lock)
+int
+io_stats_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, lock);
+ UPDATE_PROFILE_STATS (frame, FSETXATTR);
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata);
return 0;
}
-int32_t
-io_stats_setdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+int
+io_stats_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ UPDATE_PROFILE_STATS (frame, FGETXATTR);
+ STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, xdata);
return 0;
}
-int32_t
-io_stats_entrylk_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+
+int
+io_stats_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ UPDATE_PROFILE_STATS (frame, FREMOVEXATTR);
+ STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata);
return 0;
}
-int32_t
-io_stats_xattrop_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
+int
+io_stats_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, dict);
+ UPDATE_PROFILE_STATS (frame, FSYNCDIR);
+ STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, xdata);
return 0;
}
-int32_t
-io_stats_fxattrop_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
+
+int
+io_stats_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, dict);
+ UPDATE_PROFILE_STATS (frame, ACCESS);
+ STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, xdata);
return 0;
}
-int32_t
-io_stats_inodelk_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+
+int
+io_stats_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ UPDATE_PROFILE_STATS (frame, FTRUNCATE);
+ STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
return 0;
}
-int32_t
+int
+io_stats_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS (frame, FSTAT);
+ STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+
+int
+io_stats_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS(frame, FALLOCATE);
+ STACK_UNWIND_STRICT(fallocate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+
+int
+io_stats_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS(frame, DISCARD);
+ STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int
+io_stats_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS(frame, ZEROFILL);
+ STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int
+io_stats_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS (frame, LK);
+ STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock, xdata);
+ return 0;
+}
+
+
+int
+io_stats_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS (frame, ENTRYLK);
+ STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+
+int
+io_stats_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS (frame, XATTROP);
+ STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+
+int
+io_stats_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS (frame, FXATTROP);
+ STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+
+int
+io_stats_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS (frame, INODELK);
+ STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
io_stats_entrylk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
- BUMP_HIT(ENTRYLK);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_entrylk_cbk,
+ STACK_WIND (frame, io_stats_entrylk_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->entrylk,
- volume, loc, basename, cmd, type);
+ volume, loc, basename, cmd, type, xdata);
return 0;
}
-int32_t
-io_stats_inodelk (call_frame_t *frame,
- xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct flock *flock)
+
+int
+io_stats_inodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
- BUMP_HIT(INODELK);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_inodelk_cbk,
+ STACK_WIND (frame, io_stats_inodelk_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->inodelk,
- volume, loc, cmd, flock);
+ volume, loc, cmd, flock, xdata);
return 0;
}
-int32_t
-io_stats_finodelk_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+int
+io_stats_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ UPDATE_PROFILE_STATS (frame, FINODELK);
+ STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno, xdata);
return 0;
}
-int32_t
-io_stats_finodelk (call_frame_t *frame,
- xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct flock *flock)
+
+int
+io_stats_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
- BUMP_HIT(FINODELK);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_finodelk_cbk,
+ STACK_WIND (frame, io_stats_finodelk_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->finodelk,
- volume, fd, cmd, flock);
+ volume, fd, cmd, flock, xdata);
return 0;
}
-int32_t
-io_stats_xattrop (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- gf_xattrop_flags_t flags,
- dict_t *dict)
+int
+io_stats_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
- BUMP_HIT(XATTROP);
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_xattrop_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->xattrop,
- loc, flags, dict);
-
+ loc, flags, dict, xdata);
return 0;
}
-int32_t
-io_stats_fxattrop (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- gf_xattrop_flags_t flags,
- dict_t *dict)
+
+int
+io_stats_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
- BUMP_HIT(FXATTROP);
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_fxattrop_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fxattrop,
- fd, flags, dict);
-
+ fd, flags, dict, xdata);
return 0;
}
-int32_t
-io_stats_lookup (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xattr_req)
+
+int
+io_stats_lookup (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xdata)
{
- BUMP_HIT(LOOKUP);
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_lookup_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lookup,
- loc, xattr_req);
-
+ loc, xdata);
return 0;
}
-int32_t
-io_stats_stat (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
+
+int
+io_stats_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- BUMP_HIT(STAT);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_stat_cbk,
+ STACK_WIND (frame, io_stats_stat_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->stat,
- loc);
-
+ loc, xdata);
return 0;
}
-int32_t
-io_stats_readlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- size_t size)
+
+int
+io_stats_readlink (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, size_t size, dict_t *xdata)
{
- BUMP_HIT(READLINK);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_readlink_cbk,
+ STACK_WIND (frame, io_stats_readlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readlink,
- loc,
- size);
-
+ loc, size, xdata);
return 0;
}
-int32_t
-io_stats_mknod (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode,
- dev_t dev)
+
+int
+io_stats_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, dev_t dev, mode_t umask, dict_t *xdata)
{
- BUMP_HIT(MKNOD);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_mknod_cbk,
+ STACK_WIND (frame, io_stats_mknod_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mknod,
- loc,
- mode,
- dev);
-
+ loc, mode, dev, umask, xdata);
return 0;
}
-int32_t
-io_stats_mkdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode)
+
+int
+io_stats_mkdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
{
- BUMP_HIT(MKDIR);
+ frame->local = gf_strdup (loc->path);
+
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_mkdir_cbk,
+ STACK_WIND (frame, io_stats_mkdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mkdir,
- loc,
- mode);
+ loc, mode, umask, xdata);
return 0;
}
-int32_t
-io_stats_unlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
+
+int
+io_stats_unlink (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int xflag, dict_t *xdata)
{
- BUMP_HIT(UNLINK);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_unlink_cbk,
+ STACK_WIND (frame, io_stats_unlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->unlink,
- loc);
+ loc, xflag, xdata);
return 0;
}
-int32_t
-io_stats_rmdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
+
+int
+io_stats_rmdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int flags, dict_t *xdata)
{
- BUMP_HIT(RMDIR);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_rmdir_cbk,
+ STACK_WIND (frame, io_stats_rmdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rmdir,
- loc);
-
+ loc, flags, xdata);
return 0;
}
-int32_t
-io_stats_symlink (call_frame_t *frame,
- xlator_t *this,
- const char *linkpath,
- loc_t *loc)
+
+int
+io_stats_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
- BUMP_HIT(SYMLINK);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_symlink_cbk,
+ STACK_WIND (frame, io_stats_symlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->symlink,
- linkpath,
- loc);
-
+ linkpath, loc, umask, xdata);
return 0;
}
-int32_t
-io_stats_rename (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
+
+int
+io_stats_rename (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
- BUMP_HIT(RENAME);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_rename_cbk,
+ STACK_WIND (frame, io_stats_rename_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rename,
- oldloc,
- newloc);
-
+ oldloc, newloc, xdata);
return 0;
}
-int32_t
-io_stats_link (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
+
+int
+io_stats_link (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
- BUMP_HIT(LINK);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_link_cbk,
+ STACK_WIND (frame, io_stats_link_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->link,
- oldloc,
- newloc);
+ oldloc, newloc, xdata);
return 0;
}
-int32_t
-io_stats_chmod (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode)
-{
- BUMP_HIT(CHMOD);
-
- STACK_WIND (frame,
- io_stats_chmod_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->chmod,
- loc,
- mode);
-
- return 0;
-}
-int32_t
-io_stats_chown (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- uid_t uid,
- gid_t gid)
+int
+io_stats_setattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- BUMP_HIT(CHOWN);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_chown_cbk,
+ STACK_WIND (frame, io_stats_setattr_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->chown,
- loc,
- uid,
- gid);
-
+ FIRST_CHILD(this)->fops->setattr,
+ loc, stbuf, valid, xdata);
return 0;
}
-int32_t
-io_stats_truncate (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- off_t offset)
+
+int
+io_stats_truncate (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, off_t offset, dict_t *xdata)
{
- BUMP_HIT(TRUNCATE);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_truncate_cbk,
+ STACK_WIND (frame, io_stats_truncate_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->truncate,
- loc,
- offset);
-
+ loc, offset, xdata);
return 0;
}
-int32_t
-io_stats_utimens (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- struct timespec tv[2])
-{
- BUMP_HIT(UTIMENS);
-
- STACK_WIND (frame,
- io_stats_utimens_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->utimens,
- loc,
- tv);
- return 0;
-}
-
-int32_t
-io_stats_open (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags,
- fd_t *fd)
+int
+io_stats_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata)
{
- BUMP_HIT(OPEN);
+ frame->local = gf_strdup (loc->path);
- STACK_WIND (frame,
- io_stats_open_cbk,
+ START_FOP_LATENCY (frame);
+
+ STACK_WIND (frame, io_stats_open_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->open,
- loc,
- flags,
- fd);
+ loc, flags, fd, xdata);
return 0;
}
-int32_t
-io_stats_create (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags,
- mode_t mode,
- fd_t *fd)
+
+int
+io_stats_create (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *xdata)
{
- BUMP_HIT(CREATE);
+ frame->local = gf_strdup (loc->path);
- STACK_WIND (frame,
- io_stats_create_cbk,
+ START_FOP_LATENCY (frame);
+
+ STACK_WIND (frame, io_stats_create_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->create,
- loc,
- flags,
- mode,
- fd);
+ loc, flags, mode, umask, fd, xdata);
return 0;
}
-int32_t
-io_stats_readv (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset)
+
+int
+io_stats_readv (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata)
{
- BUMP_HIT(READ);
+ frame->local = fd;
+
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_readv_cbk,
+ STACK_WIND (frame, io_stats_readv_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readv,
- fd,
- size,
- offset);
+ fd, size, offset, flags, xdata);
return 0;
}
-int32_t
-io_stats_writev (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t offset,
- struct iobref *iobref)
-{
- BUMP_HIT(WRITE);
-
- STACK_WIND (frame,
- io_stats_writev_cbk,
+
+int
+io_stats_writev (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, struct iovec *vector,
+ int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
+{
+ int len = 0;
+
+ if (fd->inode)
+ frame->local = fd->inode;
+ len = iov_length (vector, count);
+
+ BUMP_WRITE (fd, len);
+ START_FOP_LATENCY (frame);
+
+ STACK_WIND (frame, io_stats_writev_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->writev,
- fd,
- vector,
- count,
- offset,
- iobref);
+ fd, vector, count, offset, flags, iobref, xdata);
return 0;
+
}
-int32_t
-io_stats_statfs (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
+
+int
+io_stats_statfs (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xdata)
{
- BUMP_HIT(STATFS);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_statfs_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->statfs,
- loc);
+ STACK_WIND (frame, io_stats_statfs_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->statfs,
+ loc, xdata);
return 0;
}
-int32_t
-io_stats_flush (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
+
+int
+io_stats_flush (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, dict_t *xdata)
{
- BUMP_HIT(FLUSH);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_flush_cbk,
+ STACK_WIND (frame, io_stats_flush_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->flush,
- fd);
+ fd, xdata);
return 0;
}
-int32_t
-io_stats_fsync (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags)
+int
+io_stats_fsync (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t flags, dict_t *xdata)
{
- BUMP_HIT(FSYNC);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_fsync_cbk,
+ STACK_WIND (frame, io_stats_fsync_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsync,
- fd,
- flags);
+ fd, flags, xdata);
return 0;
}
-int32_t
-io_stats_setxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *dict,
- int32_t flags)
+
+int
+conditional_dump (dict_t *dict, char *key, data_t *value, void *data)
+{
+ struct {
+ xlator_t *this;
+ inode_t *inode;
+ const char *path;
+ } *stub;
+ xlator_t *this = NULL;
+ char *filename = NULL;
+ FILE *logfp = NULL;
+ struct ios_dump_args args = {0};
+
+ stub = data;
+ this = stub->this;
+
+ filename = alloca (value->len + 1);
+ memset (filename, 0, value->len + 1);
+ memcpy (filename, data_to_str (value), value->len);
+
+ if (fnmatch ("*io*stat*dump", key, 0) == 0) {
+
+ if (!strncmp (filename, "", 1)) {
+ gf_log (this->name, GF_LOG_ERROR, "No filename given");
+ return -1;
+ }
+ logfp = fopen (filename, "w+");
+ if (!logfp) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to open %s "
+ "for writing", filename);
+ return -1;
+ }
+ (void) ios_dump_args_init (&args, IOS_DUMP_TYPE_FILE,
+ logfp);
+ io_stats_dump (this, &args);
+ fclose (logfp);
+ }
+ return 0;
+}
+
+
+int
+io_stats_setxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
{
- BUMP_HIT(SETXATTR);
+ struct {
+ xlator_t *this;
+ inode_t *inode;
+ const char *path;
+ } stub;
- STACK_WIND (frame,
- io_stats_setxattr_cbk,
+ stub.this = this;
+ stub.inode = loc->inode;
+ stub.path = loc->path;
+
+ dict_foreach (dict, conditional_dump, &stub);
+
+ START_FOP_LATENCY (frame);
+
+ STACK_WIND (frame, io_stats_setxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setxattr,
- loc,
- dict,
- flags);
+ loc, dict, flags, xdata);
return 0;
}
-int32_t
-io_stats_getxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
+
+int
+io_stats_getxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
{
- BUMP_HIT(GETXATTR);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_getxattr_cbk,
+ STACK_WIND (frame, io_stats_getxattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->getxattr,
- loc,
- name);
+ loc, name, xdata);
return 0;
}
-int32_t
-io_stats_removexattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
+
+int
+io_stats_removexattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
{
- BUMP_HIT(REMOVEXATTR);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_removexattr_cbk,
+ STACK_WIND (frame, io_stats_removexattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->removexattr,
- loc,
- name);
+ loc, name, xdata);
+ return 0;
+}
+
+
+int
+io_stats_fsetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ START_FOP_LATENCY (frame);
+
+ STACK_WIND (frame, io_stats_fsetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ fd, dict, flags, xdata);
+ return 0;
+}
+
+int
+io_stats_fgetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ START_FOP_LATENCY (frame);
+
+ STACK_WIND (frame, io_stats_fgetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ fd, name, xdata);
return 0;
}
-int32_t
-io_stats_opendir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- fd_t *fd)
+
+int
+io_stats_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
{
- BUMP_HIT(OPENDIR);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_opendir_cbk,
+ STACK_WIND (frame, io_stats_fremovexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr,
+ fd, name, xdata);
+ return 0;
+}
+
+
+int
+io_stats_opendir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, fd_t *fd, dict_t *xdata)
+{
+
+ START_FOP_LATENCY (frame);
+
+ STACK_WIND (frame, io_stats_opendir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->opendir,
- loc,
- fd);
+ loc, fd, xdata);
return 0;
}
-int32_t
-io_stats_getdents (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset,
- int32_t flag)
+int
+io_stats_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *dict)
{
- BUMP_HIT(GETDENTS);
+ frame->local = fd->inode;
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_getdents_cbk,
+ STACK_WIND (frame, io_stats_readdirp_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getdents,
- fd,
- size,
- offset,
- flag);
+ FIRST_CHILD(this)->fops->readdirp,
+ fd, size, offset, dict);
return 0;
}
-int32_t
-io_stats_readdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset)
+int
+io_stats_readdir (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset, dict_t *xdata)
{
- BUMP_HIT(READDIR);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_readdir_cbk,
+ STACK_WIND (frame, io_stats_readdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdir,
- fd,
- size,
- offset);
-
+ fd, size, offset, xdata);
return 0;
}
-int32_t
-io_stats_fsyncdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t datasync)
+int
+io_stats_fsyncdir (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t datasync, dict_t *xdata)
{
- BUMP_HIT(FSYNCDIR);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_fsyncdir_cbk,
+ STACK_WIND (frame, io_stats_fsyncdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsyncdir,
- fd,
- datasync);
+ fd, datasync, xdata);
return 0;
}
-int32_t
-io_stats_access (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t mask)
+
+int
+io_stats_access (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t mask, dict_t *xdata)
{
- BUMP_HIT(ACCESS);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_access_cbk,
+ STACK_WIND (frame, io_stats_access_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->access,
- loc,
- mask);
+ loc, mask, xdata);
return 0;
}
-int32_t
-io_stats_ftruncate (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- off_t offset)
+
+int
+io_stats_ftruncate (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, off_t offset, dict_t *xdata)
{
- BUMP_HIT(FTRUNCATE);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_ftruncate_cbk,
+ STACK_WIND (frame, io_stats_ftruncate_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->ftruncate,
- fd,
- offset);
-
+ fd, offset, xdata);
return 0;
}
-int32_t
-io_stats_fchown (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- uid_t uid,
- gid_t gid)
+
+int
+io_stats_fsetattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- BUMP_HIT(FCHOWN);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_fchown_cbk,
+ STACK_WIND (frame, io_stats_setattr_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fchown,
- fd,
- uid,
- gid);
+ FIRST_CHILD(this)->fops->fsetattr,
+ fd, stbuf, valid, xdata);
return 0;
}
-int32_t
-io_stats_fchmod (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- mode_t mode)
+
+int
+io_stats_fstat (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, dict_t *xdata)
{
- BUMP_HIT(FCHMOD);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_fchmod_cbk,
+ STACK_WIND (frame, io_stats_fstat_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fchmod,
- fd,
- mode);
+ FIRST_CHILD(this)->fops->fstat,
+ fd, xdata);
return 0;
}
-int32_t
-io_stats_fstat (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
+
+int
+io_stats_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
{
- BUMP_HIT(FSTAT);
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame,
- io_stats_fstat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat,
- fd);
- return 0;
+ STACK_WIND(frame, io_stats_fallocate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len,
+ xdata);
+
+ return 0;
}
-int32_t
-io_stats_lk (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t cmd,
- struct flock *lock)
+
+int
+io_stats_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
{
- BUMP_HIT(LK);
+ START_FOP_LATENCY(frame);
+
+ STACK_WIND(frame, io_stats_discard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
+
+ return 0;
+}
+
+int
+io_stats_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ START_FOP_LATENCY(frame);
+
+ STACK_WIND(frame, io_stats_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
- STACK_WIND (frame,
- io_stats_lk_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lk,
- fd,
- cmd,
- lock);
return 0;
}
-int32_t
-io_stats_setdents (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags,
- dir_entry_t *entries,
- int32_t count)
+
+int
+io_stats_lk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
- BUMP_HIT(SETDENTS);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_setdents_cbk,
+ STACK_WIND (frame, io_stats_lk_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setdents,
- fd,
- flags,
- entries,
- count);
+ FIRST_CHILD(this)->fops->lk,
+ fd, cmd, lock, xdata);
return 0;
}
-int32_t
-io_stats_checksum_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- uint8_t *fchecksum,
- uint8_t *dchecksum)
+int
+io_stats_release (xlator_t *this, fd_t *fd)
{
- STACK_UNWIND (frame, op_ret, op_errno, fchecksum, dchecksum);
+ struct ios_fd *iosfd = NULL;
+ struct ios_conf *conf = NULL;
+
+ BUMP_FOP (RELEASE);
+
+ conf = this->private;
+
+ LOCK (&conf->lock);
+ {
+ conf->cumulative.nr_opens--;
+ }
+ UNLOCK (&conf->lock);
+
+ ios_fd_ctx_get (fd, this, &iosfd);
+ if (iosfd) {
+ io_stats_dump_fd (this, iosfd);
+
+ GF_FREE (iosfd->filename);
+ GF_FREE (iosfd);
+ }
return 0;
}
-int32_t
-io_stats_checksum (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flag)
+
+int
+io_stats_releasedir (xlator_t *this, fd_t *fd)
{
- STACK_WIND (frame,
- io_stats_checksum_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->checksum,
- loc,
- flag);
+ BUMP_FOP (RELEASEDIR);
return 0;
}
-int32_t
-io_stats_stats_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct xlator_stats *stats)
+int
+io_stats_forget (xlator_t *this, inode_t *inode)
{
- STACK_UNWIND (frame, op_ret, op_errno, stats);
+ BUMP_FOP (FORGET);
+ ios_stats_cleanup (this, inode);
return 0;
}
-int32_t
-io_stats_stats (call_frame_t *frame,
- xlator_t *this,
- int32_t flags)
+static int
+ios_init_top_stats (struct ios_conf *conf)
{
- STACK_WIND (frame,
- io_stats_stats_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->mops->stats,
- flags);
+ int i = 0;
+
+ GF_ASSERT (conf);
+
+ for (i = 0; i <IOS_STATS_TYPE_MAX; i++) {
+ conf->list[i].iosstats = GF_CALLOC (1,
+ sizeof(*conf->list[i].iosstats),
+ gf_io_stats_mt_ios_stat);
+
+ if (!conf->list[i].iosstats)
+ return -1;
+
+ INIT_LIST_HEAD(&conf->list[i].iosstats->list);
+ LOCK_INIT (&conf->list[i].lock);
+ }
+
+ for (i = 0; i < IOS_STATS_THRU_MAX; i ++) {
+ conf->thru_list[i].iosstats = GF_CALLOC (1,
+ sizeof (*conf->thru_list[i].iosstats),
+ gf_io_stats_mt_ios_stat);
+
+ if (!conf->thru_list[i].iosstats)
+ return -1;
+
+ INIT_LIST_HEAD(&conf->thru_list[i].iosstats->list);
+ LOCK_INIT (&conf->thru_list[i].lock);
+ }
return 0;
}
-void
-enable_all_calls (io_stats_private_t *priv, int enabled)
+static void
+ios_destroy_top_stats (struct ios_conf *conf)
{
- int i;
- for (i = 0; i < GF_FOP_MAXVALUE; i++)
- priv->fop_records[i].enabled = enabled;
-}
+ int i = 0;
+ struct ios_stat_head *list_head = NULL;
+ struct ios_stat_list *entry = NULL;
+ struct ios_stat_list *tmp = NULL;
+ struct ios_stat_list *list = NULL;
+ struct ios_stat *stat = NULL;
-void
-enable_call (io_stats_private_t *priv, const char *name, int enabled)
-{
- int i;
- for (i = 0; i < GF_FOP_MAXVALUE; i++) {
- if (!priv->fop_records[i].name)
+ GF_ASSERT (conf);
+
+ LOCK (&conf->lock);
+
+ conf->cumulative.nr_opens = 0;
+ conf->cumulative.max_nr_opens = 0;
+ conf->cumulative.max_openfd_time.tv_sec = 0;
+ conf->cumulative.max_openfd_time.tv_usec = 0;
+
+ for (i = 0; i < IOS_STATS_TYPE_MAX; i++) {
+ list_head = &conf->list[i];
+ if (!list_head)
continue;
- if (!strcasecmp(priv->fop_records[i].name, name))
- priv->fop_records[i].enabled = enabled;
+ list_for_each_entry_safe (entry, tmp,
+ &list_head->iosstats->list, list) {
+ list = entry;
+ stat = list->iosstat;
+ ios_stat_unref (stat);
+ list_del (&list->list);
+ GF_FREE (list);
+ list_head->members--;
+ }
}
-}
+ for (i = 0; i < IOS_STATS_THRU_MAX; i++) {
+ list_head = &conf->thru_list[i];
+ if (!list_head)
+ continue;
+ list_for_each_entry_safe (entry, tmp,
+ &list_head->iosstats->list, list) {
+ list = entry;
+ stat = list->iosstat;
+ ios_stat_unref (stat);
+ list_del (&list->list);
+ GF_FREE (list);
+ list_head->members--;
+ }
+ }
-/*
- include = 1 for "include-ops"
- = 0 for "exclude-ops"
-*/
-void
-process_call_list (io_stats_private_t *priv, const char *list, int include)
+ UNLOCK (&conf->lock);
+
+ return;
+}
+
+int
+reconfigure (xlator_t *this, dict_t *options)
{
- enable_all_calls (priv, include ? 0 : 1);
+ struct ios_conf *conf = NULL;
+ int ret = -1;
+ char *sys_log_str = NULL;
+ int sys_log_level = -1;
+ char *log_str = NULL;
+ int log_level = -1;
+
+ if (!this || !this->private)
+ goto out;
+
+ conf = this->private;
+
+ GF_OPTION_RECONF ("dump-fd-stats", conf->dump_fd_stats, options, bool,
+ out);
+
+ GF_OPTION_RECONF ("count-fop-hits", conf->count_fop_hits, options, bool,
+ out);
+
+ GF_OPTION_RECONF ("latency-measurement", conf->measure_latency,
+ options, bool, out);
- char *call = strsep ((char **)&list, ",");
- while (call) {
- enable_call (priv, call, include);
- call = strsep ((char **)&list, ",");
+ GF_OPTION_RECONF ("sys-log-level", sys_log_str, options, str, out);
+ if (sys_log_str) {
+ sys_log_level = glusterd_check_log_level (sys_log_str);
+ set_sys_log_level (sys_log_level);
}
+
+ GF_OPTION_RECONF ("log-level", log_str, options, str, out);
+ if (log_str) {
+ log_level = glusterd_check_log_level (log_str);
+ gf_log_set_loglevel (log_level);
+ }
+
+ ret = 0;
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "reconfigure returning %d", ret);
+ return ret;
}
int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_io_stats_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ " failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+int
init (xlator_t *this)
{
- dict_t *options = this->options;
- char *includes = NULL, *excludes = NULL;
- io_stats_private_t *priv = NULL;
+ struct ios_conf *conf = NULL;
+ char *sys_log_str = NULL;
+ int sys_log_level = -1;
+ char *log_str = NULL;
+ int log_level = -1;
+ int ret = -1;
if (!this)
return -1;
- if (!this->children || this->children->next) {
+ if (!this->children) {
gf_log (this->name, GF_LOG_ERROR,
- "io_stats translator requires one subvolume");
+ "io_stats translator requires atleast one subvolume");
return -1;
}
+
if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
+ /* This is very much valid as io-stats currently is loaded
+ * on top of volumes on both client and server, hence this is
+ * not an warning message */
+ gf_log (this->name, GF_LOG_DEBUG,
"dangling volume. check volfile ");
}
- priv = CALLOC (1, sizeof(*priv));
- ERR_ABORT (priv);
+ conf = GF_CALLOC (1, sizeof(*conf), gf_io_stats_mt_ios_conf);
- includes = data_to_str (dict_get (options, "include-ops"));
- excludes = data_to_str (dict_get (options, "exclude-ops"));
-
- {
- int i;
- for (i = 0; i < GF_FOP_MAXVALUE; i++) {
- priv->fop_records[i].name = gf_fop_list[i];
- priv->fop_records[i].enabled = 1;
- }
+ if (!conf) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory.");
+ return -1;
}
- if (includes && excludes) {
- gf_log (this->name,
- GF_LOG_ERROR,
- "must specify only one of 'include-ops' and 'exclude-ops'");
+ LOCK_INIT (&conf->lock);
+
+ gettimeofday (&conf->cumulative.started_at, NULL);
+ gettimeofday (&conf->incremental.started_at, NULL);
+
+ ret = ios_init_top_stats (conf);
+ if (ret)
return -1;
- }
- if (includes)
- process_call_list (priv, includes, 1);
- if (excludes)
- process_call_list (priv, excludes, 0);
- LOCK_INIT (&priv->lock);
+ GF_OPTION_INIT ("dump-fd-stats", conf->dump_fd_stats, bool, out);
- /* Set this translator's inode table pointer to child node's pointer. */
- this->itable = FIRST_CHILD (this)->itable;
- this->private = priv;
+ GF_OPTION_INIT ("count-fop-hits", conf->count_fop_hits, bool, out);
- return 0;
+ GF_OPTION_INIT ("latency-measurement", conf->measure_latency,
+ bool, out);
+
+ GF_OPTION_INIT ("sys-log-level", sys_log_str, str, out);
+ if (sys_log_str) {
+ sys_log_level = glusterd_check_log_level (sys_log_str);
+ set_sys_log_level (sys_log_level);
+ }
+
+ GF_OPTION_INIT ("log-level", log_str, str, out);
+ if (log_str) {
+ log_level = glusterd_check_log_level (log_str);
+ gf_log_set_loglevel (log_level);
+ }
+
+ this->private = conf;
+ ret = 0;
+out:
+ return ret;
}
+
void
fini (xlator_t *this)
{
- io_stats_private_t *priv = NULL;
+ struct ios_conf *conf = NULL;
if (!this)
return;
- priv = this->private;
- if (priv) {
- LOCK_DESTROY (&priv->lock);
- FREE (priv);
- }
+ conf = this->private;
+
+ if (!conf)
+ return;
+ this->private = NULL;
- gf_log (this->name, GF_LOG_NORMAL,
+ ios_destroy_top_stats (conf);
+
+ GF_FREE(conf);
+
+ gf_log (this->name, GF_LOG_INFO,
"io-stats translator unloaded");
return;
}
+int
+notify (xlator_t *this, int32_t event, void *data, ...)
+{
+ int ret = 0;
+ struct ios_dump_args args = {0};
+ dict_t *output = NULL;
+ dict_t *dict = NULL;
+ int32_t top_op = 0;
+ int32_t list_cnt = 0;
+ double throughput = 0;
+ double time = 0;
+ va_list ap;
+
+ dict = data;
+ va_start (ap, data);
+ output = va_arg (ap, dict_t*);
+ va_end (ap);
+ switch (event) {
+ case GF_EVENT_TRANSLATOR_INFO:
+ ret = dict_get_str_boolean (dict, "clear-stats", _gf_false);
+ if (ret) {
+ ret = dict_set_int32 (output, "top-op", top_op);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set top-op in dict");
+ goto out;
+ }
+ ios_destroy_top_stats (this->private);
+ ret = ios_init_top_stats (this->private);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to reset top stats");
+ ret = dict_set_int32 (output, "stats-cleared",
+ ret ? 0 : 1);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set stats-cleared"
+ " in dict");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "top-op", &top_op);
+ if (!ret) {
+ ret = dict_get_int32 (dict, "list-cnt", &list_cnt);
+ if (top_op > IOS_STATS_TYPE_NONE &&
+ top_op < IOS_STATS_TYPE_MAX)
+ ret = io_stats_dump_stats_to_dict (this, output,
+ top_op, list_cnt);
+ if (top_op == IOS_STATS_TYPE_READ_THROUGHPUT ||
+ top_op == IOS_STATS_TYPE_WRITE_THROUGHPUT) {
+ ret = dict_get_double (dict, "throughput",
+ &throughput);
+ if (!ret) {
+ ret = dict_get_double (dict, "time",
+ &time);
+ if (ret)
+ goto out;
+ ret = dict_set_double (output,
+ "throughput", throughput);
+ if (ret)
+ goto out;
+ ret = dict_set_double (output, "time",
+ time);
+ if (ret)
+ goto out;
+ }
+ ret = 0;
+
+ }
+ } else {
+ (void) ios_dump_args_init (&args, IOS_DUMP_TYPE_DICT,
+ output);
+ ret = io_stats_dump (this, &args);
+ }
+ break;
+ default:
+ default_notify (this, event, data);
+ break;
+
+ }
+out:
+ return ret;
+}
+
struct xlator_fops fops = {
.stat = io_stats_stat,
.readlink = io_stats_readlink,
@@ -1463,10 +2860,7 @@ struct xlator_fops fops = {
.symlink = io_stats_symlink,
.rename = io_stats_rename,
.link = io_stats_link,
- .chmod = io_stats_chmod,
- .chown = io_stats_chown,
.truncate = io_stats_truncate,
- .utimens = io_stats_utimens,
.open = io_stats_open,
.readv = io_stats_readv,
.writev = io_stats_writev,
@@ -1476,42 +2870,81 @@ struct xlator_fops fops = {
.setxattr = io_stats_setxattr,
.getxattr = io_stats_getxattr,
.removexattr = io_stats_removexattr,
+ .fsetxattr = io_stats_fsetxattr,
+ .fgetxattr = io_stats_fgetxattr,
+ .fremovexattr = io_stats_fremovexattr,
.opendir = io_stats_opendir,
.readdir = io_stats_readdir,
+ .readdirp = io_stats_readdirp,
.fsyncdir = io_stats_fsyncdir,
.access = io_stats_access,
.ftruncate = io_stats_ftruncate,
.fstat = io_stats_fstat,
.create = io_stats_create,
- .fchown = io_stats_fchown,
- .fchmod = io_stats_fchmod,
.lk = io_stats_lk,
.inodelk = io_stats_inodelk,
.finodelk = io_stats_finodelk,
.entrylk = io_stats_entrylk,
.lookup = io_stats_lookup,
- .setdents = io_stats_setdents,
- .getdents = io_stats_getdents,
- .checksum = io_stats_checksum,
.xattrop = io_stats_xattrop,
.fxattrop = io_stats_fxattrop,
-};
-
-struct xlator_mops mops = {
- .stats = io_stats_stats,
+ .setattr = io_stats_setattr,
+ .fsetattr = io_stats_fsetattr,
+ .fallocate = io_stats_fallocate,
+ .discard = io_stats_discard,
+ .zerofill = io_stats_zerofill,
};
struct xlator_cbks cbks = {
+ .release = io_stats_release,
+ .releasedir = io_stats_releasedir,
+ .forget = io_stats_forget,
};
struct volume_options options[] = {
- { .key = {"include-ops", "include"},
+ { .key = {"dump-fd-stats"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "If on stats related to file-operations would be "
+ "tracked inside GlusterFS data-structures."
+ },
+ { .key = { "latency-measurement" },
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "If on stats related to the latency of each operation "
+ "would be tracked inside GlusterFS data-structures. "
+ },
+ { .key = {"count-fop-hits"},
+ .type = GF_OPTION_TYPE_BOOL,
+ },
+ { .key = {"log-level"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = { "DEBUG", "WARNING", "ERROR", "INFO",
+ "CRITICAL", "NONE", "TRACE"}
+ },
+
+ /* These are synthetic entries to assist validation of CLI's *
+ * volume set command */
+ { .key = {"client-log-level"},
.type = GF_OPTION_TYPE_STR,
- /*.value = { ""} */
+ .default_value = "INFO",
+ .description = "Changes the log-level of the clients",
+ .value = { "DEBUG", "WARNING", "ERROR", "INFO",
+ "CRITICAL", "NONE", "TRACE"}
},
- { .key = {"exclude-ops", "exclude"},
- .type = GF_OPTION_TYPE_STR
- /*.value = { ""} */
+ { .key = {"sys-log-level"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "CRITICAL",
+ .description = "Gluster's syslog log-level",
+ .value = { "WARNING", "ERROR", "INFO", "CRITICAL"}
+ },
+ { .key = {"brick-log-level"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "INFO",
+ .description = "Changes the log-level of the bricks",
+ .value = { "DEBUG", "WARNING", "ERROR", "INFO",
+ "CRITICAL", "NONE", "TRACE"}
},
{ .key = {NULL} },
+
};
diff --git a/xlators/debug/trace/src/Makefile.am b/xlators/debug/trace/src/Makefile.am
index 0f1679a04..7b2597b4d 100644
--- a/xlators/debug/trace/src/Makefile.am
+++ b/xlators/debug/trace/src/Makefile.am
@@ -2,13 +2,15 @@
xlator_LTLIBRARIES = trace.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/debug
-trace_la_LDFLAGS = -module -avoidversion
+trace_la_LDFLAGS = -module -avoid-version
trace_la_SOURCES = trace.c
trace_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+noinst_HEADERS = trace.h trace-mem-types.h
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/debug/trace/src/trace-mem-types.h b/xlators/debug/trace/src/trace-mem-types.h
new file mode 100644
index 000000000..9fa7d97c2
--- /dev/null
+++ b/xlators/debug/trace/src/trace-mem-types.h
@@ -0,0 +1,21 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef __TRACE_MEM_TYPES_H__
+#define __TRACE_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_trace_mem_types_ {
+ gf_trace_mt_trace_conf_t = gf_common_mt_end + 1,
+ gf_trace_mt_end
+};
+#endif
diff --git a/xlators/debug/trace/src/trace.c b/xlators/debug/trace/src/trace.c
index 2c10c7515..c9d839356 100644
--- a/xlators/debug/trace/src/trace.c
+++ b/xlators/debug/trace/src/trace.c
@@ -1,2330 +1,3274 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+#include "trace.h"
+#include "trace-mem-types.h"
/**
* xlators/debug/trace :
- * This translator logs all the arguments to the fops/mops and also
- * their _cbk functions, which later passes the call to next layer.
+ * This translator logs all the arguments to the fops/mops and also
+ * their _cbk functions, which later passes the call to next layer.
* Very helpful translator for debugging.
*/
-#include <time.h>
-#include <errno.h>
-#include "glusterfs.h"
-#include "xlator.h"
-#include "common-utils.h"
-
-#define ERR_EINVAL_NORETURN(cond) \
-do \
- { \
- if ((cond)) \
- { \
- gf_log ("ERROR", GF_LOG_ERROR, \
- "%s: %s: (%s) is true", \
- __FILE__, __FUNCTION__, #cond); \
- } \
- } while (0)
-
-typedef struct trace_private {
- int32_t debug_flag;
-} trace_private_t;
-
-struct {
- char *name;
- int enabled;
-} trace_fop_names[GF_FOP_MAXVALUE];
-
-int32_t
-trace_create_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct stat *buf)
-{
- char atime_buf[256], mtime_buf[256], ctime_buf[256];
- ERR_EINVAL_NORETURN (!this);
-
- if (trace_fop_names[GF_FOP_CREATE].enabled) {
- if (op_ret >= 0) {
- strftime (atime_buf, 256, "[%b %d %H:%M:%S]",
- localtime (&buf->st_atime));
- strftime (mtime_buf, 256, "[%b %d %H:%M:%S]",
- localtime (&buf->st_mtime));
- strftime (ctime_buf, 256, "[%b %d %H:%M:%S]",
- localtime (&buf->st_ctime));
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, fd=%p, ino=%"PRIu64"), "
- "*buf {st_dev=%"GF_PRI_DEV", st_ino=%"PRIu64", "
- "st_mode=%o, st_nlink=%"GF_PRI_NLINK", st_uid=%d, "
- "st_gid=%d, st_rdev=%"GF_PRI_DEV", st_size=%"PRId64", "
- "st_blksize=%"GF_PRI_BLKSIZE", st_blocks=%"PRId64", "
- "st_atime=%s, st_mtime=%s, st_ctime=%s})",
- frame->root->unique, op_ret, fd, inode->ino, buf->st_dev,
- buf->st_ino, buf->st_mode, buf->st_nlink,
- buf->st_uid, buf->st_gid, buf->st_rdev, buf->st_size,
- buf->st_blksize,
- buf->st_blocks, atime_buf, mtime_buf, ctime_buf);
- } else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf);
- return 0;
-}
-
-int32_t
-trace_open_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- ERR_EINVAL_NORETURN (!this);
-
- if (trace_fop_names[GF_FOP_OPEN].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d, *fd=%p)",
- frame->root->unique, op_ret, op_errno, fd);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, fd);
- return 0;
-}
-
-int32_t
-trace_stat_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- char atime_buf[256], mtime_buf[256], ctime_buf[256];
- ERR_EINVAL_NORETURN (!this);
-
- if (trace_fop_names[GF_FOP_STAT].enabled) {
-
- if (op_ret >= 0) {
- strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime));
- strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime));
- strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime));
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, buf {st_dev=%"GF_PRI_DEV", "
- "st_ino=%"PRIu64", st_mode=%o, st_nlink=%"GF_PRI_NLINK", "
- "st_uid=%d, st_gid=%d, st_rdev=%"GF_PRI_DEV", st_size=%"PRId64
- ", st_blksize=%"GF_PRI_BLKSIZE", st_blocks=%"PRId64", "
- "st_atime=%s, st_mtime=%s, st_ctime=%s})",
- frame->root->unique, op_ret, buf->st_dev, buf->st_ino,
- buf->st_mode, buf->st_nlink, buf->st_uid, buf->st_gid,
- buf->st_rdev, buf->st_size, buf->st_blksize,
- buf->st_blocks, atime_buf, mtime_buf, ctime_buf);
- } else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-int32_t
-trace_readv_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct stat *buf,
- struct iobref *iobref)
-{
- char atime_buf[256], mtime_buf[256], ctime_buf[256];
- ERR_EINVAL_NORETURN (!this);
-
- if (trace_fop_names[GF_FOP_READ].enabled) {
-
- if (op_ret >= 0) {
- strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime));
- strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime));
- strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime));
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, *buf {st_dev=%"GF_PRI_DEV", "
- "st_ino=%"PRIu64", st_mode=%o, st_nlink=%"GF_PRI_NLINK", "
- "st_uid=%d, st_gid=%d, st_rdev=%"GF_PRI_DEV", "
- "st_size=%"PRId64", st_blksize=%"GF_PRI_BLKSIZE", "
- "st_blocks=%"PRId64", st_atime=%s, st_mtime=%s, st_ctime=%s})",
- frame->root->unique, op_ret, buf->st_dev, buf->st_ino,
- buf->st_mode, buf->st_nlink, buf->st_uid, buf->st_gid,
- buf->st_rdev, buf->st_size, buf->st_blksize, buf->st_blocks,
- atime_buf, mtime_buf, ctime_buf);
- } else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, vector, count, buf, iobref);
- return 0;
-}
-
-int32_t
-trace_writev_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- char atime_buf[256], mtime_buf[256], ctime_buf[256];
- ERR_EINVAL_NORETURN (!this);
-
- if (trace_fop_names[GF_FOP_WRITE].enabled) {
- if (op_ret >= 0) {
- strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime));
- strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime));
- strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime));
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, *buf {st_ino=%"PRIu64", "
- "st_size=%"PRId64", st_blocks=%"PRId64", st_atime=%s, "
- "st_mtime=%s, st_ctime=%s})",
- frame->root->unique, op_ret, buf->st_ino, buf->st_size,
- buf->st_blocks, atime_buf, mtime_buf, ctime_buf);
- } else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-int32_t
-trace_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entries,
- int32_t count)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_GETDENTS].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d, count=%d)",
- frame->root->unique, op_ret, op_errno, count);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, entries, count);
- return 0;
-}
-
-int32_t
-trace_readdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *buf)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_READDIR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64" :(op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
-
- return 0;
-}
-
-int32_t
-trace_fsync_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_FSYNC].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-int32_t
-trace_chown_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- char atime_buf[256], mtime_buf[256], ctime_buf[256];
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_CHOWN].enabled) {
- if (op_ret >= 0) {
- strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime));
- strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime));
- strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime));
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, *buf {st_ino=%"PRIu64", st_mode=%o, "
- "st_uid=%d, st_gid=%d, st_atime=%s, st_mtime=%s, st_ctime=%s})",
- frame->root->unique, op_ret, buf->st_ino, buf->st_mode,
- buf->st_uid, buf->st_gid, atime_buf, mtime_buf, ctime_buf);
- } else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-int32_t
-trace_chmod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- char atime_buf[256], mtime_buf[256], ctime_buf[256];
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_CHMOD].enabled) {
- if (op_ret >= 0) {
- strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime));
- strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime));
- strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime));
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, *buf {st_ino=%"PRIu64", st_mode=%o, "
- "st_atime=%s, st_mtime=%s, st_ctime=%s})",
- frame->root->unique, op_ret, buf->st_ino, buf->st_mode,
- atime_buf, mtime_buf, ctime_buf);
- } else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-int32_t
-trace_fchmod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- char atime_buf[256], mtime_buf[256], ctime_buf[256];
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_FCHMOD].enabled) {
- if (op_ret >= 0) {
- strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime));
- strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime));
- strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime));
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, *buf {st_ino=%"PRIu64", st_mode=%o, "
- "st_atime=%s, st_mtime=%s, st_ctime=%s})",
- frame->root->unique, op_ret, buf->st_ino, buf->st_mode,
- atime_buf, mtime_buf, ctime_buf);
- } else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-int32_t
-trace_fchown_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- char atime_buf[256], mtime_buf[256], ctime_buf[256];
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_FCHOWN].enabled) {
- if (op_ret >= 0) {
- strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime));
- strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime));
- strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime));
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, *buf {st_ino=%"PRIu64", st_mode=%o, "
- "st_uid=%d, st_gid=%d, st_atime=%s, st_mtime=%s, st_ctime=%s})",
- frame->root->unique, op_ret, buf->st_ino, buf->st_mode,
- buf->st_uid, buf->st_gid, atime_buf, mtime_buf, ctime_buf);
- } else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-int32_t
-trace_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_UNLINK].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-int32_t
-trace_rename_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_RENAME].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d, buf {st_ino=%"PRIu64"})",
- frame->root->unique, op_ret, op_errno,
- (buf? buf->st_ino : 0));
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-int32_t
-trace_readlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- const char *buf)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_READLINK].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d, buf=%s)",
- frame->root->unique, op_ret, op_errno, buf);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-int32_t
-trace_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf,
- dict_t *xattr)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_LOOKUP].enabled) {
- if (op_ret >= 0) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, ino=%"PRIu64", "
- "*buf {st_dev=%"GF_PRI_DEV", st_ino=%"PRIu64", st_mode=%o, "
- "st_nlink=%"GF_PRI_NLINK", st_uid=%d, st_gid=%d, "
- "st_rdev=%"GF_PRI_DEV", st_size=%"PRId64", "
- "st_blksize=%"GF_PRI_BLKSIZE", st_blocks=%"PRId64"})",
- frame->root->unique, op_ret, inode->ino, buf->st_dev, buf->st_ino,
- buf->st_mode, buf->st_nlink, buf->st_uid, buf->st_gid,
- buf->st_rdev, buf->st_size, buf->st_blksize, buf->st_blocks);
- } else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf, xattr);
- return 0;
-}
-
-int32_t
-trace_symlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
-{
- char atime_buf[256], mtime_buf[256], ctime_buf[256];
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_SYMLINK].enabled) {
- if (op_ret >= 0) {
- strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime));
- strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime));
- strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime));
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, ino=%"PRIu64", *buf {st_ino=%"PRIu64", "
- "st_mode=%o, st_nlink=%"GF_PRI_NLINK", st_uid=%d, st_gid=%d, "
- "st_size=%"PRId64", st_blocks=%"PRId64", st_atime=%s, "
- "st_mtime=%s, st_ctime=%s})",
- frame->root->unique, op_ret, inode->ino, buf->st_ino,
- buf->st_mode, buf->st_nlink, buf->st_uid, buf->st_gid,
- buf->st_size, buf->st_blocks, atime_buf, mtime_buf, ctime_buf);
- } else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
- return 0;
-}
-
-int32_t
-trace_mknod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
-{
- char atime_buf[256], mtime_buf[256], ctime_buf[256];
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_MKNOD].enabled) {
- if (op_ret >= 0) {
- strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime));
- strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime));
- strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime));
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, ino=%"PRIu64", *buf {st_dev=%"GF_PRI_DEV
- ", st_ino=%"PRIu64", st_mode=%o, st_nlink=%"GF_PRI_NLINK", "
- "st_uid=%d, st_gid=%d, st_rdev=%"GF_PRI_DEV", st_size=%"PRId64
- ", st_blksize=%"GF_PRI_BLKSIZE", st_blocks=%"PRId64", st_atime=%s, "
- "st_mtime=%s, st_ctime=%s})",
- frame->root->unique, op_ret, inode->ino, buf->st_dev, buf->st_ino,
- buf->st_mode, buf->st_nlink, buf->st_uid, buf->st_gid,
- buf->st_rdev, buf->st_size, buf->st_blksize, buf->st_blocks,
- atime_buf, mtime_buf, ctime_buf);
- } else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
- return 0;
-}
-
-
-int32_t
-trace_mkdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_MKDIR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d, ino=%"PRIu64"",
- frame->root->unique, op_ret, op_errno,
- (inode? inode->ino : 0));
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
- return 0;
-}
-
-int32_t
-trace_link_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_LINK].enabled) {
- if (op_ret >= 0) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, ino=%"PRIu64", "
- "*buf {st_nlink=%"GF_PRI_NLINK"})",
- frame->root->unique, op_ret, inode->ino, buf->st_nlink);
- } else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
- return 0;
-}
-
-int32_t
-trace_flush_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_FLUSH].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-
-int32_t
-trace_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_OPENDIR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d, fd=%p)",
- frame->root->unique, op_ret, op_errno, fd);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, fd);
- return 0;
-}
-
-int32_t
-trace_rmdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_RMDIR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-int32_t
-trace_truncate_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_TRUNCATE].enabled) {
- if (op_ret >= 0) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, *buf {st_size=%"PRId64", st_blksize=%"
- GF_PRI_BLKSIZE", st_blocks=%"PRId64"})",
- frame->root->unique, op_ret, buf->st_size, buf->st_blksize,
- buf->st_blocks);
- } else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-int32_t
-trace_utimens_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- char atime_buf[256], mtime_buf[256], ctime_buf[256];
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_UTIMENS].enabled) {
- if (op_ret >= 0) {
- strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime));
- strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime));
- strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime));
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, *buf {st_atime=%s, st_mtime=%s, "
- "st_ctime=%s})",
- frame->root->unique, op_ret, atime_buf, mtime_buf, ctime_buf);
- } else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-int32_t
-trace_statfs_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct statvfs *buf)
-{
- ERR_EINVAL_NORETURN (!this);
-
- if (trace_fop_names[GF_FOP_STATFS].enabled) {
- if (op_ret >= 0) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": ({f_bsize=%lu, f_frsize=%lu, f_blocks=%"GF_PRI_FSBLK
- ", f_bfree=%"GF_PRI_FSBLK", f_bavail=%"GF_PRI_FSBLK", "
- "f_files=%"GF_PRI_FSBLK", f_ffree=%"GF_PRI_FSBLK", f_favail=%"
- GF_PRI_FSBLK", f_fsid=%lu, f_flag=%lu, f_namemax=%lu}) => ret=%d",
- frame->root->unique, buf->f_bsize, buf->f_frsize, buf->f_blocks,
- buf->f_bfree, buf->f_bavail, buf->f_files, buf->f_ffree,
- buf->f_favail, buf->f_fsid, buf->f_flag, buf->f_namemax, op_ret);
- } else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-int32_t
-trace_setxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_SETXATTR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-int32_t
-trace_getxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
-{
- ERR_EINVAL_NORETURN (!this || !dict);
-
- if (trace_fop_names[GF_FOP_GETXATTR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d, dict=%p)",
- frame->root->unique, op_ret, op_errno, dict);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, dict);
- return 0;
-}
-
-int32_t
-trace_removexattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_REMOVEXATTR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-
-int32_t
-trace_fsyncdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_FSYNCDIR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-int32_t
-trace_access_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_ACCESS].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-int32_t
-trace_ftruncate_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_FTRUNCATE].enabled) {
- if (op_ret >= 0) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, *buf {st_size=%"PRId64", "
- "st_blksize=%"GF_PRI_BLKSIZE", st_blocks=%"PRId64"})",
- frame->root->unique, op_ret, buf->st_size, buf->st_blksize,
- buf->st_blocks);
- } else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-int32_t
-trace_fstat_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- char atime_buf[256], mtime_buf[256], ctime_buf[256];
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_FSTAT].enabled) {
- if (op_ret >= 0) {
- strftime (atime_buf, 256, "[%b %d %H:%M:%S]",
- localtime (&buf->st_atime));
- strftime (mtime_buf, 256, "[%b %d %H:%M:%S]",
- localtime (&buf->st_mtime));
- strftime (ctime_buf, 256, "[%b %d %H:%M:%S]",
- localtime (&buf->st_ctime));
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, *buf {st_dev=%"GF_PRI_DEV", "
- "st_ino=%"PRIu64", st_mode=%o, st_nlink=%"GF_PRI_NLINK", "
- "st_uid=%d, st_gid=%d, st_rdev=%"GF_PRI_DEV", st_size=%"PRId64", "
- "st_blksize=%"GF_PRI_BLKSIZE", st_blocks=%"PRId64", st_atime=%s, "
- "st_mtime=%s, st_ctime=%s})",
- frame->root->unique, op_ret, buf->st_dev, buf->st_ino,
- buf->st_mode, buf->st_nlink, buf->st_uid, buf->st_gid,
- buf->st_rdev, buf->st_size, buf->st_blksize,
- buf->st_blocks, atime_buf, mtime_buf, ctime_buf);
- } else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-int32_t
-trace_lk_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct flock *lock)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_LK].enabled) {
- if (op_ret >= 0) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, {l_type=%d, l_whence=%d, "
- "l_start=%"PRId64", l_len=%"PRId64", l_pid=%u})",
- frame->root->unique, op_ret, lock->l_type, lock->l_whence,
- lock->l_start, lock->l_len, lock->l_pid);
- } else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, lock);
- return 0;
-}
-
-
-int32_t
-trace_setdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_SETDENTS].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": op_ret=%d, op_errno=%d",
- frame->root->unique, op_ret, op_errno);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-int32_t
-trace_entrylk_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_ENTRYLK].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": op_ret=%d, op_errno=%d",
- frame->root->unique, op_ret, op_errno);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-
-int32_t
-trace_xattrop_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
-{
- ERR_EINVAL_NORETURN (!this || !dict);
-
- if (trace_fop_names[GF_FOP_XATTROP].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
+int
+dump_history_trace (circular_buffer_t *cb, void *data)
+{
+ char *string = NULL;
+ struct tm *tm = NULL;
+ char timestr[256] = {0,};
+
+ string = (char *)cb->data;
+ tm = localtime (&cb->tv.tv_sec);
+
+ /* Since we are continuing with adding entries to the buffer even when
+ gettimeofday () fails, it's safe to check tm and then dump the time
+ at which the entry was added to the buffer */
+ if (tm) {
+ strftime (timestr, 256, "%Y-%m-%d %H:%M:%S", tm);
+ snprintf (timestr + strlen (timestr), 256 - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS, cb->tv.tv_usec);
+ gf_proc_dump_write ("TIME", "%s", timestr);
+ }
+
+ gf_proc_dump_write ("FOP", "%s\n", string);
+
+ return 0;
+}
+
+int
+trace_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ char statstr[4096] = {0, };
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_CREATE].enabled) {
+ char string[4096] = {0,};
+ if (op_ret >= 0) {
+ trace_stat_to_str (buf, statstr);
+ trace_stat_to_str (preparent, preparentstr);
+ trace_stat_to_str (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s (op_ret=%d, fd=%p"
+ "*stbuf {%s}, *preparent {%s}, "
+ "*postparent = {%s})",
+ frame->root->unique,
+ uuid_utoa (inode->gfid), op_ret, fd,
+ statstr, preparentstr, postparentstr);
+
+ /* for 'release' log */
+ fd_ctx_set (fd, this, 0);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int
+trace_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_OPEN].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d, "
+ "*fd=%p", frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno,
+ fd);
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ /* for 'release' log */
+ if (op_ret >= 0)
+ fd_ctx_set (fd, this, 0);
+
+ TRACE_STACK_UNWIND (open, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+int
+trace_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ char statstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_STAT].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d buf=%s",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ statstr);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int
+trace_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *buf, struct iobref *iobref,
+ dict_t *xdata)
+{
+ char statstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_READ].enabled) {
+ char string[4096] = {0,};
+ if (op_ret >= 0) {
+ trace_stat_to_str (buf, statstr);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d buf=%s",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ statstr);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count,
+ buf, iobref, xdata);
+ return 0;
+}
+
+int
+trace_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ char preopstr[4096] = {0, };
+ char postopstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_WRITE].enabled) {
+ char string[4096] = {0,};
+ if (op_ret >= 0) {
+ trace_stat_to_str (prebuf, preopstr);
+ trace_stat_to_str (postbuf, postopstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s})",
+ frame->root->unique, op_ret,
+ preopstr, postopstr);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int
+trace_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *buf,
+ dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_READDIR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64" : gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique, uuid_utoa (frame->local),
+ op_ret, op_errno);
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (readdir, frame, op_ret, op_errno, buf, xdata);
+
+ return 0;
+}
+
+int
+trace_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *buf,
+ dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_READDIRP].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64" : gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique, uuid_utoa (frame->local),
+ op_ret, op_errno);
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ TRACE_STACK_UNWIND (readdirp, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int
+trace_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ char preopstr[4096] = {0, };
+ char postopstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSYNC].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (prebuf, preopstr);
+ trace_stat_to_str (postbuf, postopstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s}",
+ frame->root->unique, op_ret,
+ preopstr, postopstr);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
+
+ }
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
+}
+
+int
+trace_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost, dict_t *xdata)
+{
+ char preopstr[4096] = {0, };
+ char postopstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_SETATTR].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (statpre, preopstr);
+ trace_stat_to_str (statpost, postopstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s})",
+ frame->root->unique, op_ret,
+ preopstr, postopstr);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (setattr, frame, op_ret, op_errno, statpre,
+ statpost, xdata);
+ return 0;
+}
+
+int
+trace_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost, dict_t *xdata)
+{
+ char preopstr[4096] = {0, };
+ char postopstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSETATTR].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (statpre, preopstr);
+ trace_stat_to_str (statpost, postopstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s})",
+ frame->root->unique, op_ret,
+ preopstr, postopstr);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d)",
+ frame->root->unique, uuid_utoa (frame->local),
+ op_ret, op_errno);
+ }
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (fsetattr, frame, op_ret, op_errno,
+ statpre, statpost, xdata);
+ return 0;
+}
+
+int
+trace_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_UNLINK].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (preparent, preparentstr);
+ trace_stat_to_str (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ " *preparent = {%s}, "
+ "*postparent = {%s})",
+ frame->root->unique,
+ uuid_utoa (frame->local),
+ op_ret, preparentstr,
+ postparentstr);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (unlink, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int
+trace_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ char statstr[4096] = {0, };
+ char preoldparentstr[4096] = {0, };
+ char postoldparentstr[4096] = {0, };
+ char prenewparentstr[4096] = {0, };
+ char postnewparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_RENAME].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ trace_stat_to_str (preoldparent, preoldparentstr);
+ trace_stat_to_str (postoldparent, postoldparentstr);
+ trace_stat_to_str (prenewparent, prenewparentstr);
+ trace_stat_to_str (postnewparent, postnewparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*stbuf = {%s}, *preoldparent = {%s},"
+ " *postoldparent = {%s}"
+ " *prenewparent = {%s}, "
+ "*postnewparent = {%s})",
+ frame->root->unique, op_ret, statstr,
+ preoldparentstr, postoldparentstr,
+ prenewparentstr, postnewparentstr);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local),
+ op_ret, op_errno);
+
+ }
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (rename, frame, op_ret, op_errno, buf,
+ preoldparent, postoldparent,
+ prenewparent, postnewparent, xdata);
+ return 0;
+}
+
+int
+trace_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ const char *buf, struct iatt *stbuf, dict_t *xdata)
+{
+ char statstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_READLINK].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (stbuf, statstr);
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, op_errno=%d,"
+ "buf=%s, stbuf = { %s })",
+ frame->root->unique, op_ret, op_errno,
+ buf, statstr);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
+ }
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (readlink, frame, op_ret, op_errno, buf, stbuf,
+ xdata);
+ return 0;
+}
+
+int
+trace_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ dict_t *xdata, struct iatt *postparent)
+{
+ char statstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_LOOKUP].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ trace_stat_to_str (postparent, postparentstr);
+ /* print buf->ia_gfid instead of inode->gfid,
+ * since if the inode is not yet linked to the
+ * inode table (fresh lookup) then null gfid
+ * will be printed.
+ */
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s (op_ret=%d "
+ "*buf {%s}, *postparent {%s}",
+ frame->root->unique,
+ uuid_utoa (buf->ia_gfid),
+ op_ret, statstr, postparentstr);
+
+ /* For 'forget' */
+ inode_ctx_put (inode, this, 0);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf,
+ xdata, postparent);
+ return 0;
+}
+
+int
+trace_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ char statstr[4096] = {0, };
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_SYMLINK].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ trace_stat_to_str (preparent, preparentstr);
+ trace_stat_to_str (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s (op_ret=%d "
+ "*stbuf = {%s}, *preparent = {%s}, "
+ "*postparent = {%s})",
+ frame->root->unique,
+ uuid_utoa (inode->gfid),
+ op_ret, statstr, preparentstr,
+ postparentstr);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": op_ret=%d, op_errno=%d",
+ frame->root->unique, op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (symlink, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int
+trace_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ char statstr[4096] = {0, };
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ char string[4096] = {0,};
+ if (trace_fop_names[GF_FOP_MKNOD].enabled) {
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ trace_stat_to_str (preparent, preparentstr);
+ trace_stat_to_str (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s (op_ret=%d "
+ "*stbuf = {%s}, *preparent = {%s}, "
+ "*postparent = {%s})",
+ frame->root->unique,
+ uuid_utoa (inode->gfid),
+ op_ret, statstr, preparentstr,
+ postparentstr);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int
+trace_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ char statstr[4096] = {0, };
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_MKDIR].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ trace_stat_to_str (preparent, preparentstr);
+ trace_stat_to_str (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s (op_ret=%d "
+ ", *stbuf = {%s}, *prebuf = {%s}, "
+ "*postbuf = {%s} )",
+ frame->root->unique,
+ uuid_utoa (inode->gfid),
+ op_ret, statstr, preparentstr,
+ postparentstr);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (mkdir, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int
+trace_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ char statstr[4096] = {0, };
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ char string[4096] = {0,};
+ if (trace_fop_names[GF_FOP_LINK].enabled) {
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ trace_stat_to_str (preparent, preparentstr);
+ trace_stat_to_str (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*stbuf = {%s}, *prebuf = {%s},"
+ " *postbuf = {%s})",
+ frame->root->unique, op_ret,
+ statstr, preparentstr, postparentstr);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local),
+ op_ret, op_errno);
+ }
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (link, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int
+trace_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ char string[4096] = {0,};
+ if (trace_fop_names[GF_FOP_FLUSH].enabled) {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique, uuid_utoa (frame->local),
+ op_ret, op_errno);
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (flush, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+trace_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ char string[4096] = {0,};
+ if (trace_fop_names[GF_FOP_OPENDIR].enabled) {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d,"
+ " fd=%p",
+ frame->root->unique, uuid_utoa (frame->local),
+ op_ret, op_errno, fd);
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ /* for 'releasedir' log */
+ if (op_ret >= 0)
+ fd_ctx_set (fd, this, 0);
+
+ TRACE_STACK_UNWIND (opendir, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+int
+trace_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_RMDIR].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (preparent, preparentstr);
+ trace_stat_to_str (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "*prebuf={%s}, *postbuf={%s}",
+ frame->root->unique,
+ uuid_utoa (frame->local),
+ op_ret, preparentstr, postparentstr);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local),
+ op_ret, op_errno);
+ }
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (rmdir, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int
+trace_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ char preopstr[4096] = {0, };
+ char postopstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_TRUNCATE].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (prebuf, preopstr);
+ trace_stat_to_str (postbuf, postopstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s} )",
+ frame->root->unique, op_ret,
+ preopstr, postopstr);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (truncate, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ return 0;
+}
+
+int
+trace_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_STATFS].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ snprintf (string, sizeof (string),
+ "%"PRId64": ({f_bsize=%lu, "
+ "f_frsize=%lu, "
+ "f_blocks=%"GF_PRI_FSBLK
+ ", f_bfree=%"GF_PRI_FSBLK", "
+ "f_bavail=%"GF_PRI_FSBLK", "
+ "f_files=%"GF_PRI_FSBLK", "
+ "f_ffree=%"GF_PRI_FSBLK", "
+ "f_favail=%"GF_PRI_FSBLK", "
+ "f_fsid=%lu, f_flag=%lu, "
+ "f_namemax=%lu}) => ret=%d",
+ frame->root->unique, buf->f_bsize,
+ buf->f_frsize, buf->f_blocks,
+ buf->f_bfree, buf->f_bavail,
+ buf->f_files, buf->f_ffree,
+ buf->f_favail, buf->f_fsid,
+ buf->f_flag, buf->f_namemax, op_ret);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "op_errno=%d)",
+ frame->root->unique, op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (statfs, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int
+trace_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_SETXATTR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+trace_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_GETXATTR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d,"
+ " dict=%p", frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno,
+ dict);
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
+
+ return 0;
+}
+
+int
+trace_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSETXATTR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+trace_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FGETXATTR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d,"
+ " dict=%p", frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno,
+ dict);
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, dict, xdata);
+
+ return 0;
+}
+
+int
+trace_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_REMOVEXATTR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int
+trace_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSYNCDIR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (fsyncdir, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+trace_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_ACCESS].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)", frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (access, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+trace_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ char prebufstr[4096] = {0, };
+ char postbufstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FTRUNCATE].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (prebuf, prebufstr);
+ trace_stat_to_str (postbuf, postbufstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s} )",
+ frame->root->unique, op_ret,
+ prebufstr, postbufstr);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int
+trace_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
+{
+ char statstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSTAT].enabled) {
+ char string[4096] = {0.};
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d "
+ "buf=%s", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ statstr);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int
+trace_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_LK].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "{l_type=%d, l_whence=%d, "
+ "l_start=%"PRId64", "
+ "l_len=%"PRId64", l_pid=%u})",
+ frame->root->unique,
+ uuid_utoa (frame->local),
+ op_ret, lock->l_type, lock->l_whence,
+ lock->l_start, lock->l_len,
+ lock->l_pid);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
+ }
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (lk, frame, op_ret, op_errno, lock, xdata);
+ return 0;
+}
+
+int
+trace_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_ENTRYLK].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (entrylk, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+trace_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FENTRYLK].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (fentrylk, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+trace_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_XATTROP].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (xattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+int
+trace_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FXATTROP].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
- STACK_UNWIND (frame, op_ret, op_errno, dict);
- return 0;
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
-int32_t
-trace_fxattrop_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
+int
+trace_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- ERR_EINVAL_NORETURN (!this || !dict);
+ trace_conf_t *conf = NULL;
- if (trace_fop_names[GF_FOP_FXATTROP].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
+ conf = this->private;
- STACK_UNWIND (frame, op_ret, op_errno, dict);
- return 0;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_INODELK].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local),op_ret, op_errno);
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (inodelk, frame, op_ret, op_errno, xdata);
+ return 0;
}
-int32_t
-trace_inodelk_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+int
+trace_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- ERR_EINVAL_NORETURN (!this );
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
- if (trace_fop_names[GF_FOP_INODELK].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": op_ret=%d, op_errno=%d",
- frame->root->unique, op_ret, op_errno);
- }
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FINODELK].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (finodelk, frame, op_ret, op_errno, xdata);
+ return 0;
}
+int
+trace_rchecksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ uint32_t weak_checksum, uint8_t *strong_checksum,
+ dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
-int32_t
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_RCHECKSUM].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ TRACE_STACK_UNWIND (rchecksum, frame, op_ret, op_errno, weak_checksum,
+ strong_checksum, xdata);
+
+ return 0;
+}
+
+/* *_cbk section over <----------> fop section start */
+
+int
trace_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ const char *volume, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_ENTRYLK].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s volume=%s, (path=%s "
+ "basename=%s, cmd=%s, type=%s)",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid),
+ volume, loc->path, basename,
+ ((cmd == ENTRYLK_LOCK) ? "ENTRYLK_LOCK" :
+ "ENTRYLK_UNLOCK"),
+ ((type == ENTRYLK_RDLCK) ? "ENTRYLK_RDLCK" :
+ "ENTRYLK_WRLCK"));
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_entrylk_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->entrylk,
+ volume, loc, basename, cmd, type, xdata);
+ return 0;
+}
+
+int
+trace_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
+{
+ char *cmd_str = NULL;
+ char *type_str = NULL;
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_INODELK].enabled) {
+ char string[4096] = {0,};
+ switch (cmd) {
+#if F_GETLK != F_GETLK64
+ case F_GETLK64:
+#endif
+ case F_GETLK:
+ cmd_str = "GETLK";
+ break;
+
+#if F_SETLK != F_SETLK64
+ case F_SETLK64:
+#endif
+ case F_SETLK:
+ cmd_str = "SETLK";
+ break;
+
+#if F_SETLKW != F_SETLKW64
+ case F_SETLKW64:
+#endif
+ case F_SETLKW:
+ cmd_str = "SETLKW";
+ break;
+
+ default:
+ cmd_str = "UNKNOWN";
+ break;
+ }
+
+ switch (flock->l_type) {
+ case F_RDLCK:
+ type_str = "READ";
+ break;
+ case F_WRLCK:
+ type_str = "WRITE";
+ break;
+ case F_UNLCK:
+ type_str = "UNLOCK";
+ break;
+ default:
+ type_str = "UNKNOWN";
+ break;
+ }
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s volume=%s, (path=%s "
+ "cmd=%s, type=%s, start=%llu, len=%llu, "
+ "pid=%llu)", frame->root->unique,
+ uuid_utoa (loc->inode->gfid), volume,
+ loc->path, cmd_str, type_str,
+ (unsigned long long)flock->l_start,
+ (unsigned long long) flock->l_len,
+ (unsigned long long) flock->l_pid);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_inodelk_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->inodelk,
+ volume, loc, cmd, flock, xdata);
+ return 0;
+}
+
+int
+trace_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
+{
+ char *cmd_str = NULL;
+ char *type_str = NULL;
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FINODELK].enabled) {
+ char string[4096] = {0,};
+ switch (cmd) {
+#if F_GETLK != F_GETLK64
+ case F_GETLK64:
+#endif
+ case F_GETLK:
+ cmd_str = "GETLK";
+ break;
+
+#if F_SETLK != F_SETLK64
+ case F_SETLK64:
+#endif
+ case F_SETLK:
+ cmd_str = "SETLK";
+ break;
+
+#if F_SETLKW != F_SETLKW64
+ case F_SETLKW64:
+#endif
+ case F_SETLKW:
+ cmd_str = "SETLKW";
+ break;
+
+ default:
+ cmd_str = "UNKNOWN";
+ break;
+ }
+
+ switch (flock->l_type) {
+ case F_RDLCK:
+ type_str = "READ";
+ break;
+ case F_WRLCK:
+ type_str = "WRITE";
+ break;
+ case F_UNLCK:
+ type_str = "UNLOCK";
+ break;
+ default:
+ type_str = "UNKNOWN";
+ break;
+ }
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s volume=%s, (fd =%p "
+ "cmd=%s, type=%s, start=%llu, len=%llu, "
+ "pid=%llu)", frame->root->unique,
+ uuid_utoa (fd->inode->gfid), volume, fd,
+ cmd_str, type_str,
+ (unsigned long long) flock->l_start,
+ (unsigned long long) flock->l_len,
+ (unsigned long long) flock->l_pid);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ STACK_WIND (frame, trace_finodelk_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->finodelk,
+ volume, fd, cmd, flock, xdata);
+ return 0;
+}
+
+int
+trace_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
- ERR_EINVAL_NORETURN (!this || !loc || !basename);
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_XATTROP].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s (path=%s flags=%d)",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ flags);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_xattrop_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->xattrop,
+ loc, flags, dict, xdata);
- if (trace_fop_names[GF_FOP_ENTRYLK].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": volume=%s, (loc= {path=%s, ino=%"PRIu64"} basename=%s, cmd=%s, type=%s)",
- frame->root->unique, volume, loc->path, loc->inode->ino, basename,
- ((cmd == ENTRYLK_LOCK) ? "ENTRYLK_LOCK" : "ENTRYLK_UNLOCK"),
- ((type == ENTRYLK_RDLCK) ? "ENTRYLK_RDLCK" : "ENTRYLK_WRLCK"));
- }
+ return 0;
+}
+
+int
+trace_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
- STACK_WIND (frame,
- trace_entrylk_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->entrylk,
- volume, loc, basename, cmd, type);
- return 0;
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FXATTROP].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, flags=%d",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, flags);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_fxattrop_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fxattrop,
+ fd, flags, dict, xdata);
+
+ return 0;
}
-int32_t
-trace_inodelk (call_frame_t *frame,
- xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct flock *flock)
+int
+trace_lookup (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_LOOKUP].enabled) {
+ char string[4096] = {0,};
+ /* TODO: print all the keys mentioned in xattr_req */
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_lookup_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup,
+ loc, xdata);
+
+ return 0;
+}
+
+int
+trace_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_STAT].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_stat_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat,
+ loc, xdata);
+
+ return 0;
+}
+
+int
+trace_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_READLINK].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s, "
+ "size=%"GF_PRI_SIZET")", frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ size);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_readlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readlink,
+ loc, size, xdata);
+
+ return 0;
+}
+
+int
+trace_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, dev_t dev, mode_t umask, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_MKNOD].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s mode=%d "
+ "umask=0%o, dev=%"GF_PRI_DEV")",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ mode, umask, dev);
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_mknod_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod,
+ loc, mode, dev, umask, xdata);
+
+ return 0;
+}
+
+int
+trace_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_MKDIR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s mode=%d"
+ " umask=0%o", frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ mode, umask);
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_mkdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir,
+ loc, mode, umask, xdata);
+ return 0;
+}
+
+int
+trace_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_UNLINK].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s flag=%d",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ xflag);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ STACK_WIND (frame, trace_unlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink,
+ loc, xflag, xdata);
+ return 0;
+}
+
+int
+trace_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_RMDIR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s flags=%d",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ flags);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_rmdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir,
+ loc, flags, xdata);
+
+ return 0;
+}
+
+int
+trace_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_SYMLINK].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s linkpath=%s, path=%s"
+ " umask=0%o", frame->root->unique,
+ uuid_utoa (loc->inode->gfid), linkpath,
+ loc->path, umask);
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_symlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink,
+ linkpath, loc, umask, xdata);
+
+ return 0;
+}
+
+int
+trace_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ char oldgfid[50] = {0,};
+ char newgfid[50] = {0,};
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_RENAME].enabled) {
+ char string[4096] = {0,};
+ if (newloc->inode)
+ uuid_utoa_r (newloc->inode->gfid, newgfid);
+ else
+ strcpy (newgfid, "0");
+
+ uuid_utoa_r (oldloc->inode->gfid, oldgfid);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": oldgfid=%s oldpath=%s --> "
+ "newgfid=%s newpath=%s",
+ frame->root->unique, oldgfid,
+ oldloc->path, newgfid, newloc->path);
+
+ frame->local = oldloc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_rename_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename,
+ oldloc, newloc, xdata);
+
+ return 0;
+}
+
+int
+trace_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ char oldgfid[50] = {0,};
+ char newgfid[50] = {0,};
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_LINK].enabled) {
+ char string[4096] = {0,};
+ if (newloc->inode)
+ uuid_utoa_r (newloc->inode->gfid, newgfid);
+ else
+ strcpy (newgfid, "0");
+
+ uuid_utoa_r (oldloc->inode->gfid, oldgfid);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": oldgfid=%s oldpath=%s --> "
+ "newgfid=%s newpath=%s", frame->root->unique,
+ oldgfid, oldloc->path, newgfid,
+ newloc->path);
+
+ frame->local = oldloc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_link_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link,
+ oldloc, newloc, xdata);
+ return 0;
+}
+
+int
+trace_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ uint64_t ia_time = 0;
+ char actime_str[256] = {0,};
+ char modtime_str[256] = {0,};
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_SETATTR].enabled) {
+ char string[4096] = {0,};
+ if (valid & GF_SET_ATTR_MODE) {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s mode=%o)",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid),
+ loc->path,
+ st_mode_from_ia (stbuf->ia_prot,
+ stbuf->ia_type));
+
+ LOG_ELEMENT (conf, string);
+ memset (string, 0 , sizeof (string));
+ }
+
+ if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s uid=%o,"
+ " gid=%o", frame->root->unique,
+ uuid_utoa (loc->inode->gfid),
+ loc->path, stbuf->ia_uid,
+ stbuf->ia_gid);
+
+ LOG_ELEMENT (conf, string);
+ memset (string, 0 , sizeof (string));
+ }
+
+ if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) {
+ ia_time = stbuf->ia_atime;
+ strftime (actime_str, 256, "[%b %d %H:%M:%S]",
+ localtime ((time_t *)&ia_time));
+
+ ia_time = stbuf->ia_mtime;
+ strftime (modtime_str, 256, "[%b %d %H:%M:%S]",
+ localtime ((time_t *)&ia_time));
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s "
+ "ia_atime=%s, ia_mtime=%s",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid),
+ loc->path, actime_str, modtime_str);
+
+ LOG_ELEMENT (conf, string);
+ memset (string, 0 , sizeof (string));
+ }
+ frame->local = loc->inode->gfid;
+ }
+
+out:
+ STACK_WIND (frame, trace_setattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr,
+ loc, stbuf, valid, xdata);
+
+ return 0;
+}
+
+int
+trace_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ uint64_t ia_time = 0;
+ char actime_str[256] = {0,};
+ char modtime_str[256] = {0,};
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSETATTR].enabled) {
+ char string[4096] = {0,};
+ if (valid & GF_SET_ATTR_MODE) {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, mode=%o",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd,
+ st_mode_from_ia (stbuf->ia_prot,
+ stbuf->ia_type));
+
+ LOG_ELEMENT (conf, string);
+ memset (string, 0, sizeof (string));
+ }
+
+ if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, uid=%o, "
+ "gid=%o", frame->root->unique,
+ uuid_utoa (fd->inode->gfid),
+ fd, stbuf->ia_uid, stbuf->ia_gid);
+
+ LOG_ELEMENT (conf, string);
+ memset (string, 0, sizeof (string));
+ }
+
+ if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) {
+ ia_time = stbuf->ia_atime;
+ strftime (actime_str, 256, "[%b %d %H:%M:%S]",
+ localtime ((time_t *)&ia_time));
+
+ ia_time = stbuf->ia_mtime;
+ strftime (modtime_str, 256, "[%b %d %H:%M:%S]",
+ localtime ((time_t *)&ia_time));
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p "
+ "ia_atime=%s, ia_mtime=%s",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid),
+ fd, actime_str, modtime_str);
+
+ LOG_ELEMENT (conf, string);
+ memset (string, 0, sizeof (string));
+ }
+ frame->local = fd->inode->gfid;
+ }
+
+out:
+ STACK_WIND (frame, trace_fsetattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetattr,
+ fd, stbuf, valid, xdata);
+
+ return 0;
+}
+
+int
+trace_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ off_t offset, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_TRUNCATE].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s, "
+ "offset=%"PRId64"", frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ offset);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_truncate_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate,
+ loc, offset, xdata);
+
+ return 0;
+}
+
+int
+trace_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_OPEN].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s flags=%d fd=%p",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ flags, fd);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_open_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open,
+ loc, flags, fd, xdata);
+ return 0;
+}
+
+int
+trace_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_CREATE].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s, fd=%p, "
+ "flags=0%o mode=0%o umask=0%o",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ fd, flags, mode, umask);
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_create_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create,
+ loc, flags, mode, umask, fd, xdata);
+ return 0;
+}
+
+int
+trace_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, uint32_t flags, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_READ].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, size=%"
+ GF_PRI_SIZET"offset=%"PRId64" flags=0%x)",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, size,
+ offset, flags);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_readv_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv,
+ fd, size, offset, flags, xdata);
+ return 0;
+}
+
+int
+trace_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count,
+ off_t offset, uint32_t flags, struct iobref *iobref, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_WRITE].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, count=%d, "
+ " offset=%"PRId64" flags=0%x)",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, count,
+ offset, flags);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_writev_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev,
+ fd, vector, count, offset, flags, iobref, xdata);
+ return 0;
+}
+
+int
+trace_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_STATFS].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s",
+ frame->root->unique, (loc->inode)?
+ uuid_utoa (loc->inode->gfid):"0", loc->path);
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_statfs_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->statfs,
+ loc, xdata);
+ return 0;
+}
+
+int
+trace_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FLUSH].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_flush_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ fd, xdata);
+ return 0;
+}
+
+int
+trace_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSYNC].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s flags=%d fd=%p",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), flags, fd);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_fsync_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync,
+ fd, flags, xdata);
+ return 0;
+}
+
+int
+trace_setxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_SETXATTR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s flags=%d",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ flags);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_setxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ loc, dict, flags, xdata);
+ return 0;
+}
+
+int
+trace_getxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_GETXATTR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s name=%s",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ name);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_getxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ loc, name, xdata);
+ return 0;
+}
+
+int
+trace_removexattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_REMOVEXATTR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s name=%s",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ name);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_removexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr,
+ loc, name, xdata);
+
+ return 0;
+}
+
+int
+trace_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_OPENDIR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s fd=%p",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path, fd);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_opendir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir,
+ loc, fd, xdata);
+ return 0;
+}
+
+int
+trace_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *dict)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_READDIRP].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, size=%"GF_PRI_SIZET
+ ", offset=%"PRId64" dict=%p",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, size,
+ offset, dict);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_readdirp_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp,
+ fd, size, offset, dict);
+
+ return 0;
+}
+
+int
+trace_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, dict_t *xdata)
{
- ERR_EINVAL_NORETURN (!this || !loc);
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_READDIR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, size=%"GF_PRI_SIZET
+ ", offset=%"PRId64,
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, size,
+ offset);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
- if (trace_fop_names[GF_FOP_INODELK].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": volume=%s, (loc {path=%s, ino=%"PRIu64"}, cmd=%s)",
- frame->root->unique, volume, loc->path, loc->inode->ino,
- ((cmd == F_SETLK)? "F_SETLK" : "unknown"));
- }
+out:
+ STACK_WIND (frame, trace_readdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir,
+ fd, size, offset, xdata);
- STACK_WIND (frame,
- trace_inodelk_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->inodelk,
- volume, loc, cmd, flock);
- return 0;
+ return 0;
}
+int
+trace_fsyncdir (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t datasync, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSYNCDIR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s datasync=%d fd=%p",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), datasync, fd);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
-int32_t
-trace_finodelk_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+out:
+ STACK_WIND (frame, trace_fsyncdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsyncdir,
+ fd, datasync, xdata);
+ return 0;
+}
+
+int
+trace_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
{
- ERR_EINVAL_NORETURN (!this );
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_ACCESS].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s mask=0%o",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid),
+ loc->path, mask);
+
+ frame->local = loc->inode->gfid;
- if (trace_fop_names[GF_FOP_FINODELK].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": op_ret=%d, op_errno=%d",
- frame->root->unique, op_ret, op_errno);
- }
+ LOG_ELEMENT (conf, string);
+ }
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
+out:
+ STACK_WIND (frame, trace_access_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->access,
+ loc, mask, xdata);
+ return 0;
}
int32_t
-trace_finodelk (call_frame_t *frame,
- xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct flock *flock)
+trace_rchecksum (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ int32_t len, dict_t *xdata)
{
- ERR_EINVAL_NORETURN (!this || !fd);
- if (trace_fop_names[GF_FOP_FINODELK].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": volume=%s, (fd=%p, cmd=%s)",
- frame->root->unique, volume, fd,
- ((cmd == F_SETLK) ? "F_SETLK" : "unknown"));
- }
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_RCHECKSUM].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s offset=%"PRId64
+ "len=%u fd=%p", frame->root->unique,
+ uuid_utoa (fd->inode->gfid), offset, len, fd);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_rchecksum_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rchecksum,
+ fd, offset, len, xdata);
+
+ return 0;
- STACK_WIND (frame,
- trace_finodelk_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->finodelk,
- volume, fd, cmd, flock);
- return 0;
}
+int32_t
+trace_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FENTRYLK].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s volume=%s, (fd=%p "
+ "basename=%s, cmd=%s, type=%s)",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), volume, fd,
+ basename,
+ ((cmd == ENTRYLK_LOCK) ? "ENTRYLK_LOCK" :
+ "ENTRYLK_UNLOCK"),
+ ((type == ENTRYLK_RDLCK) ? "ENTRYLK_RDLCK" :
+ "ENTRYLK_WRLCK"));
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_fentrylk_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fentrylk,
+ volume, fd, basename, cmd, type, xdata);
+ return 0;
+
+}
int32_t
-trace_xattrop (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- gf_xattrop_flags_t flags,
- dict_t *dict)
+trace_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
{
- ERR_EINVAL_NORETURN (!this || !loc);
+ trace_conf_t *conf = NULL;
- if (trace_fop_names[GF_FOP_XATTROP].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (path=%s, ino=%"PRIu64" flags=%d)",
- frame->root->unique, loc->path, loc->inode->ino, flags);
-
- }
-
- STACK_WIND (frame, trace_xattrop_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->xattrop,
- loc, flags, dict);
+ conf = this->private;
- return 0;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FGETXATTR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p name=%s",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, name);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_fgetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ fd, name, xdata);
+ return 0;
}
int32_t
-trace_fxattrop (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- gf_xattrop_flags_t flags,
- dict_t *dict)
-{
- ERR_EINVAL_NORETURN (!this || !fd);
-
- if (trace_fop_names[GF_FOP_FXATTROP].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (fd=%p, flags=%d)",
- frame->root->unique, fd, flags);
-
- }
-
- STACK_WIND (frame, trace_fxattrop_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fxattrop,
- fd, flags, dict);
-
- return 0;
-}
-
-int32_t
-trace_lookup (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xattr_req)
-{
- ERR_EINVAL_NORETURN (!this || !loc);
-
- if (trace_fop_names[GF_FOP_LOOKUP].enabled) {
- /* TODO: print all the keys mentioned in xattr_req */
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"})",
- frame->root->unique, loc->path,
- loc->inode->ino);
- }
-
- STACK_WIND (frame, trace_lookup_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup,
- loc, xattr_req);
-
- return 0;
-}
-
-int32_t
-trace_stat (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- ERR_EINVAL_NORETURN (!this || !loc );
-
-
- if (trace_fop_names[GF_FOP_STAT].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"})",
- frame->root->unique, loc->path, loc->inode->ino);
- }
-
- STACK_WIND (frame,
- trace_stat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat,
- loc);
-
- return 0;
-}
-
-int32_t
-trace_readlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- size_t size)
-{
- ERR_EINVAL_NORETURN (!this || !loc || (size < 1));
-
- if (trace_fop_names[GF_FOP_READLINK].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, size=%"GF_PRI_SIZET")",
- frame->root->unique, loc->path, loc->inode->ino, size);
- }
-
- STACK_WIND (frame,
- trace_readlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readlink,
- loc,
- size);
-
- return 0;
-}
-
-int32_t
-trace_mknod (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode,
- dev_t dev)
-{
- ERR_EINVAL_NORETURN (!this || !loc->path);
-
- if (trace_fop_names[GF_FOP_MKNOD].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, mode=%d, dev=%"GF_PRI_DEV")",
- frame->root->unique, loc->path, loc->inode->ino, mode, dev);
- }
-
- STACK_WIND (frame,
- trace_mknod_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod,
- loc,
- mode,
- dev);
-
- return 0;
-}
-
-int32_t
-trace_mkdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode)
-{
- ERR_EINVAL_NORETURN (!this || !loc || !loc->path);
-
- if (trace_fop_names[GF_FOP_MKDIR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (path=%s, ino=%"PRIu64", mode=%d)",
- frame->root->unique, loc->path,
- ((loc->inode)? loc->inode->ino : 0), mode);
- }
-
- STACK_WIND (frame,
- trace_mkdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mkdir,
- loc,
- mode);
- return 0;
-}
-
-int32_t
-trace_unlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- ERR_EINVAL_NORETURN (!this || !loc);
-
- if (trace_fop_names[GF_FOP_UNLINK].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"})",
- frame->root->unique, loc->path, loc->inode->ino);
- }
-
- STACK_WIND (frame,
- trace_unlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink,
- loc);
- return 0;
-}
-
-int32_t
-trace_rmdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- ERR_EINVAL_NORETURN (!this || !loc);
-
- if (trace_fop_names[GF_FOP_RMDIR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"})",
- frame->root->unique, loc->path, loc->inode->ino);
- }
-
- STACK_WIND (frame,
- trace_rmdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir,
- loc);
-
- return 0;
-}
-
-int32_t
-trace_symlink (call_frame_t *frame,
- xlator_t *this,
- const char *linkpath,
- loc_t *loc)
-{
- ERR_EINVAL_NORETURN (!this || !linkpath || !loc || !loc->path);
-
- if (trace_fop_names[GF_FOP_SYMLINK].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (linkpath=%s, loc {path=%s, ino=%"PRIu64"})",
- frame->root->unique, linkpath, loc->path,
- ((loc->inode)? loc->inode->ino : 0));
- }
-
- STACK_WIND (frame,
- trace_symlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->symlink,
- linkpath,
- loc);
-
- return 0;
-}
-
-int32_t
-trace_rename (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
-{
- ERR_EINVAL_NORETURN (!this || !oldloc || !newloc);
-
- if (trace_fop_names[GF_FOP_RENAME].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (oldloc {path=%s, ino=%"PRIu64"}, "
- "newloc{path=%s, ino=%"PRIu64"})",
- frame->root->unique, oldloc->path, oldloc->ino,
- newloc->path, newloc->ino);
- }
-
- STACK_WIND (frame,
- trace_rename_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename,
- oldloc,
- newloc);
-
- return 0;
-}
-
-int32_t
-trace_link (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
-{
- ERR_EINVAL_NORETURN (!this || !oldloc || !newloc);
-
- if (trace_fop_names[GF_FOP_LINK].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (oldloc {path=%s, ino=%"PRIu64"}, "
- "newloc {path=%s, ino=%"PRIu64"})",
- frame->root->unique, oldloc->path, oldloc->inode->ino,
- newloc->path, newloc->inode->ino);
- }
-
- STACK_WIND (frame,
- trace_link_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link,
- oldloc,
- newloc);
- return 0;
-}
-
-int32_t
-trace_chmod (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode)
-{
- ERR_EINVAL_NORETURN (!this || !loc);
-
- if (trace_fop_names[GF_FOP_CHMOD].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, mode=%o)",
- frame->root->unique, loc->path, loc->inode->ino, mode);
- }
-
- STACK_WIND (frame,
- trace_chmod_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->chmod,
- loc,
- mode);
-
- return 0;
-}
-
-int32_t
-trace_chown (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- uid_t uid,
- gid_t gid)
-{
- ERR_EINVAL_NORETURN (!this || !loc);
-
- if (trace_fop_names[GF_FOP_CHOWN].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, uid=%d, gid=%d)",
- frame->root->unique, loc->path, loc->inode->ino, uid, gid);
- }
-
- STACK_WIND (frame,
- trace_chown_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->chown,
- loc,
- uid,
- gid);
-
- return 0;
-}
-
-int32_t
-trace_truncate (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- off_t offset)
-{
- ERR_EINVAL_NORETURN (!this || !loc);
-
- if (trace_fop_names[GF_FOP_TRUNCATE].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, offset=%"PRId64")",
- frame->root->unique, loc->path, loc->inode->ino, offset);
- }
-
- STACK_WIND (frame,
- trace_truncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate,
- loc,
- offset);
-
- return 0;
-}
-
-int32_t
-trace_utimens (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- struct timespec tv[2])
-{
- char actime_str[256];
- char modtime_str[256];
-
- ERR_EINVAL_NORETURN (!this || !loc || !tv);
-
- if (trace_fop_names[GF_FOP_UTIMENS].enabled) {
- strftime (actime_str, 256, "[%b %d %H:%M:%S]", localtime (&tv[0].tv_sec));
- strftime (modtime_str, 256, "[%b %d %H:%M:%S]", localtime (&tv[1].tv_sec));
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, "
- "*tv=%p {actime=%s, modtime=%s})",
- frame->root->unique, loc->path, loc->inode->ino,
- tv, actime_str, modtime_str);
- }
-
- STACK_WIND (frame,
- trace_utimens_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->utimens,
- loc,
- tv);
-
- return 0;
-}
-
-int32_t
-trace_open (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags,
- fd_t *fd)
-{
- ERR_EINVAL_NORETURN (!this || !loc);
-
- if (trace_fop_names[GF_FOP_OPEN].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, flags=%d, fd=%p)",
- frame->root->unique, loc->path, loc->inode->ino, flags, fd);
- }
-
- STACK_WIND (frame,
- trace_open_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open,
- loc,
- flags,
- fd);
- return 0;
-}
-
-int32_t
-trace_create (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags,
- mode_t mode,
- fd_t *fd)
-{
- ERR_EINVAL_NORETURN (!this || !loc->path);
-
- if (trace_fop_names[GF_FOP_CREATE].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, flags=0%o mode=0%o)",
- frame->root->unique, loc->path, loc->inode->ino, flags, mode);
- }
-
- STACK_WIND (frame,
- trace_create_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create,
- loc,
- flags,
- mode,
- fd);
- return 0;
-}
-
-int32_t
-trace_readv (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset)
-{
- ERR_EINVAL_NORETURN (!this || !fd || (size < 1));
-
- if (trace_fop_names[GF_FOP_READ].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (*fd=%p, size=%"GF_PRI_SIZET", offset=%"PRId64")",
- frame->root->unique, fd, size, offset);
- }
-
- STACK_WIND (frame,
- trace_readv_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv,
- fd,
- size,
- offset);
- return 0;
-}
-
-int32_t
-trace_writev (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t offset,
- struct iobref *iobref)
-{
- ERR_EINVAL_NORETURN (!this || !fd || !vector || (count < 1));
-
- if (trace_fop_names[GF_FOP_WRITE].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (*fd=%p, *vector=%p, count=%d, offset=%"PRId64")",
- frame->root->unique, fd, vector, count, offset);
- }
-
- STACK_WIND (frame,
- trace_writev_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev,
- fd,
- vector,
- count,
- offset,
- iobref);
- return 0;
-}
-
-int32_t
-trace_statfs (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- ERR_EINVAL_NORETURN (!this || !loc);
-
- if (trace_fop_names[GF_FOP_STATFS].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"})",
- frame->root->unique, loc->path,
- ((loc->inode)? loc->inode->ino : 0));
- }
-
- STACK_WIND (frame,
- trace_statfs_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->statfs,
- loc);
- return 0;
-}
-
-int32_t
-trace_flush (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
-{
- ERR_EINVAL_NORETURN (!this || !fd);
-
- if (trace_fop_names[GF_FOP_FLUSH].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (*fd=%p)",
- frame->root->unique, fd);
- }
-
- STACK_WIND (frame,
- trace_flush_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush,
- fd);
- return 0;
-}
-
-
-int32_t
-trace_fsync (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags)
-{
- ERR_EINVAL_NORETURN (!this || !fd);
-
- if (trace_fop_names[GF_FOP_FSYNC].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (flags=%d, *fd=%p)",
- frame->root->unique, flags, fd);
- }
-
- STACK_WIND (frame,
- trace_fsync_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync,
- fd,
- flags);
- return 0;
-}
-
-int32_t
-trace_setxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *dict,
- int32_t flags)
-{
- ERR_EINVAL_NORETURN (!this || !loc || !dict);
-
- if (trace_fop_names[GF_FOP_SETXATTR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, dict=%p, flags=%d)",
- frame->root->unique, loc->path,
- ((loc->inode)? loc->inode->ino : 0), dict, flags);
- }
-
- STACK_WIND (frame,
- trace_setxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr,
- loc,
- dict,
- flags);
- return 0;
-}
-
-int32_t
-trace_getxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
-{
- ERR_EINVAL_NORETURN (!this || !loc);
-
- if (trace_fop_names[GF_FOP_GETXATTR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"}), name=%s",
- frame->root->unique, loc->path,
- ((loc->inode)? loc->inode->ino : 0), name);
- }
-
- STACK_WIND (frame,
- trace_getxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr,
- loc,
- name);
- return 0;
-}
-
-int32_t
-trace_removexattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
-{
- ERR_EINVAL_NORETURN (!this || !loc || !name);
-
- if (trace_fop_names[GF_FOP_REMOVEXATTR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, name=%s)",
- frame->root->unique, loc->path,
- ((loc->inode)? loc->inode->ino : 0), name);
- }
-
- STACK_WIND (frame,
- trace_removexattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr,
- loc,
- name);
-
- return 0;
-}
-
-int32_t
-trace_opendir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- fd_t *fd)
-{
- ERR_EINVAL_NORETURN (!this || !loc );
-
- if (trace_fop_names[GF_FOP_OPENDIR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64":( loc {path=%s, ino=%"PRIu64"}, fd=%p)",
- frame->root->unique, loc->path, loc->inode->ino, fd);
- }
-
- STACK_WIND (frame,
- trace_opendir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->opendir,
- loc,
- fd);
- return 0;
-}
-
-int32_t
-trace_getdents (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset,
- int32_t flag)
-{
- ERR_EINVAL_NORETURN (!this || !fd);
-
- if (trace_fop_names[GF_FOP_GETDENTS].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (fd=%p, size=%"GF_PRI_SIZET", offset=%"PRId64", flag=0x%x)",
- frame->root->unique, fd, size, offset, flag);
- }
-
- STACK_WIND (frame,
- trace_getdents_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getdents,
- fd,
- size,
- offset,
- flag);
- return 0;
-}
-
-
-int32_t
-trace_readdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset)
-{
- ERR_EINVAL_NORETURN (!this || !fd);
-
- if (trace_fop_names[GF_FOP_READDIR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (fd=%p, size=%"GF_PRI_SIZET", offset=%"PRId64")",
- frame->root->unique, fd, size, offset);
- }
-
- STACK_WIND (frame,
- trace_readdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdir,
- fd,
- size,
- offset);
-
- return 0;
-}
-
-
-int32_t
-trace_fsyncdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t datasync)
-{
- ERR_EINVAL_NORETURN (!this || !fd);
-
- if (trace_fop_names[GF_FOP_FSYNCDIR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (datasync=%d, *fd=%p)",
- frame->root->unique, datasync, fd);
- }
-
- STACK_WIND (frame,
- trace_fsyncdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsyncdir,
- fd,
- datasync);
- return 0;
-}
-
-int32_t
-trace_access (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t mask)
-{
- ERR_EINVAL_NORETURN (!this || !loc);
-
- if (trace_fop_names[GF_FOP_ACCESS].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (*loc {path=%s, ino=%"PRIu64"}, mask=0%o)",
- frame->root->unique, loc->path,
- ((loc->inode)? loc->inode->ino : 0), mask);
- }
-
- STACK_WIND (frame,
- trace_access_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->access,
- loc,
- mask);
- return 0;
-}
-
-int32_t
-trace_ftruncate (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- off_t offset)
-{
- ERR_EINVAL_NORETURN (!this || !fd);
-
- if (trace_fop_names[GF_FOP_FTRUNCATE].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (offset=%"PRId64", *fd=%p)",
- frame->root->unique, offset, fd);
- }
-
- STACK_WIND (frame,
- trace_ftruncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate,
- fd,
- offset);
-
- return 0;
-}
-
-int32_t
-trace_fchown (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- uid_t uid,
- gid_t gid)
-{
- ERR_EINVAL_NORETURN (!this || !fd);
-
- if (trace_fop_names[GF_FOP_FCHOWN].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (*fd=%p, uid=%d, gid=%d)",
- frame->root->unique, fd, uid, gid);
- }
-
- STACK_WIND (frame,
- trace_fchown_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fchown,
- fd,
- uid,
- gid);
- return 0;
-}
-
-int32_t
-trace_fchmod (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- mode_t mode)
-{
- ERR_EINVAL_NORETURN (!this || !fd);
-
- if (trace_fop_names[GF_FOP_FCHMOD].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (mode=%o, *fd=%p)",
- frame->root->unique, mode, fd);
- }
-
- STACK_WIND (frame,
- trace_fchmod_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fchmod,
- fd,
- mode);
- return 0;
-}
-
-int32_t
-trace_fstat (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
-{
- ERR_EINVAL_NORETURN (!this || !fd);
-
- if (trace_fop_names[GF_FOP_FSTAT].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (*fd=%p)",
- frame->root->unique, fd);
- }
-
- STACK_WIND (frame,
- trace_fstat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat,
- fd);
- return 0;
-}
-
-int32_t
-trace_lk (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t cmd,
- struct flock *lock)
-{
- ERR_EINVAL_NORETURN (!this || !fd);
-
- if (trace_fop_names[GF_FOP_LK].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (*fd=%p, cmd=%d, lock {l_type=%d, l_whence=%d, "
- "l_start=%"PRId64", l_len=%"PRId64", l_pid=%u})",
- frame->root->unique, fd, cmd, lock->l_type, lock->l_whence,
- lock->l_start, lock->l_len, lock->l_pid);
- }
-
- STACK_WIND (frame,
- trace_lk_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lk,
- fd,
- cmd,
- lock);
- return 0;
-}
-
-int32_t
-trace_setdents (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags,
- dir_entry_t *entries,
- int32_t count)
-{
- if (trace_fop_names[GF_FOP_SETDENTS].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (*fd=%p, flags=%d, count=%d",
- frame->root->unique, fd, flags, count);
- }
-
- STACK_WIND (frame,
- trace_setdents_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setdents,
- fd,
- flags,
- entries,
- count);
- return 0;
+trace_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSETXATTR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p flags=%d",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, flags);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_fsetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ fd, dict, flags, xdata);
+ return 0;
}
+int
+trace_ftruncate (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, off_t offset, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FTRUNCATE].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s offset=%"PRId64" fd=%p",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), offset, fd);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_ftruncate_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate,
+ fd, offset, xdata);
+
+ return 0;
+}
+
+int
+trace_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSTAT].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_fstat_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat,
+ fd, xdata);
+ return 0;
+}
+
+int
+trace_lk (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_LK].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, cmd=%d, "
+ "lock {l_type=%d, "
+ "l_whence=%d, l_start=%"PRId64", "
+ "l_len=%"PRId64", l_pid=%u})",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, cmd,
+ lock->l_type, lock->l_whence,
+ lock->l_start, lock->l_len, lock->l_pid);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_lk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lk,
+ fd, cmd, lock, xdata);
+ return 0;
+}
int32_t
-trace_checksum_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- uint8_t *fchecksum,
- uint8_t *dchecksum)
+trace_forget (xlator_t *this, inode_t *inode)
{
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": op_ret (%d), op_errno(%d)",
- frame->root->unique, op_ret, op_errno);
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+ /* If user want to understand when a lookup happens,
+ he should know about 'forget' too */
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_LOOKUP].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "gfid=%s", uuid_utoa (inode->gfid));
- STACK_UNWIND (frame, op_ret, op_errno, fchecksum, dchecksum);
+ LOG_ELEMENT (conf, string);
+ }
- return 0;
+out:
+ return 0;
}
int32_t
-trace_checksum (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flag)
-{
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": loc->path (%s) flag (%d)",
- frame->root->unique, loc->path, flag);
-
- STACK_WIND (frame,
- trace_checksum_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->checksum,
- loc,
- flag);
-
- return 0;
-}
-
-
-int32_t
-trace_stats_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct xlator_stats *stats)
-{
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": op_ret (%d), op_errno(%d)",
- frame->root->unique, op_ret, op_errno);
-
- STACK_UNWIND (frame, op_ret, op_errno, stats);
- return 0;
-}
-
-int32_t
-trace_stats (call_frame_t *frame,
- xlator_t *this,
- int32_t flags)
-{
- ERR_EINVAL_NORETURN (!this);
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (flags=%d)",
- frame->root->unique, flags);
-
- STACK_WIND (frame,
- trace_stats_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->mops->stats,
- flags);
-
- return 0;
+trace_releasedir (xlator_t *this, fd_t *fd)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_OPENDIR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "gfid=%s fd=%p",
+ uuid_utoa (fd->inode->gfid), fd);
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ return 0;
}
+int32_t
+trace_release (xlator_t *this, fd_t *fd)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_OPEN].enabled ||
+ trace_fop_names[GF_FOP_CREATE].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "gfid=%s fd=%p",
+ uuid_utoa (fd->inode->gfid), fd);
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ return 0;
+}
+
+
void
enable_all_calls (int enabled)
{
- int i;
- for (i = 0; i < GF_FOP_MAXVALUE; i++)
- trace_fop_names[i].enabled = enabled;
+ int i;
+
+ for (i = 0; i < GF_FOP_MAXVALUE; i++)
+ trace_fop_names[i].enabled = enabled;
}
-void
+void
enable_call (const char *name, int enabled)
{
- int i;
- for (i = 0; i < GF_FOP_MAXVALUE; i++)
- if (!strcasecmp(trace_fop_names[i].name, name))
- trace_fop_names[i].enabled = enabled;
+ int i;
+ for (i = 0; i < GF_FOP_MAXVALUE; i++)
+ if (!strcasecmp(trace_fop_names[i].name, name))
+ trace_fop_names[i].enabled = enabled;
}
-/*
- include = 1 for "include-ops"
- = 0 for "exclude-ops"
+/*
+ include = 1 for "include-ops"
+ = 0 for "exclude-ops"
*/
void
process_call_list (const char *list, int include)
{
- enable_all_calls (include ? 0 : 1);
-
- char *call = strsep ((char **)&list, ",");
- while (call) {
- enable_call (call, include);
- call = strsep ((char **)&list, ",");
- }
+ enable_all_calls (include ? 0 : 1);
+
+ char *call = strsep ((char **)&list, ",");
+
+ while (call) {
+ enable_call (call, include);
+ call = strsep ((char **)&list, ",");
+ }
+}
+
+int32_t
+trace_dump_history (xlator_t *this)
+{
+ int ret = -1;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0,};
+ trace_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO ("trace", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->history, out);
+
+ conf = this->private;
+ // Is it ok to return silently if log-history option his off?
+ if (conf && conf->log_history == _gf_true) {
+ gf_proc_dump_build_key (key_prefix, "xlator.debug.trace",
+ "history");
+ gf_proc_dump_add_section (key_prefix);
+ eh_dump (this->history, NULL, dump_history_trace);
+ }
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_trace_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ " failed");
+ return ret;
+ }
+
+ return ret;
}
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ int32_t ret = -1;
+ trace_conf_t *conf = NULL;
+ char *includes = NULL, *excludes = NULL;
+
+ GF_VALIDATE_OR_GOTO ("quick-read", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO (this->name, options, out);
+
+ conf = this->private;
+
+ includes = data_to_str (dict_get (options, "include-ops"));
+ excludes = data_to_str (dict_get (options, "exclude-ops"));
+
+ {
+ int i;
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ if (gf_fop_list[i])
+ strncpy (trace_fop_names[i].name,
+ gf_fop_list[i],
+ strlen (gf_fop_list[i]));
+ else
+ strncpy (trace_fop_names[i].name, ":O",
+ strlen (":O"));
+ trace_fop_names[i].enabled = 1;
+ }
+ }
+
+ if (includes && excludes) {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "must specify only one of 'include-ops' and "
+ "'exclude-ops'");
+ goto out;
+ }
+
+ if (includes)
+ process_call_list (includes, 1);
+ if (excludes)
+ process_call_list (excludes, 0);
+
+ /* Should resizing of the event-history be allowed in reconfigure?
+ * for which a new event_history might have to be allocated and the
+ * older history has to be freed.
+ */
+ GF_OPTION_RECONF ("log-file", conf->log_file, options, bool, out);
+
+ GF_OPTION_RECONF ("log-history", conf->log_history, options, bool, out);
-int32_t
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int32_t
init (xlator_t *this)
{
- dict_t *options = this->options;
- char *includes = NULL, *excludes = NULL;
-
- if (!this)
- return -1;
-
- if (!this->children || this->children->next) {
- gf_log (this->name, GF_LOG_ERROR,
- "trace translator requires one subvolume");
- return -1;
- }
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
-
-
- includes = data_to_str (dict_get (options, "include-ops"));
- excludes = data_to_str (dict_get (options, "exclude-ops"));
-
- {
- int i;
- for (i = 0; i < GF_FOP_MAXVALUE; i++) {
- trace_fop_names[i].name = (gf_fop_list[i] ?
- gf_fop_list[i] : ":O");
- trace_fop_names[i].enabled = 1;
- }
- }
-
- if (includes && excludes) {
- gf_log (this->name,
- GF_LOG_ERROR,
- "must specify only one of 'include-ops' and 'exclude-ops'");
- return -1;
- }
- if (includes)
- process_call_list (includes, 1);
- if (excludes)
- process_call_list (excludes, 0);
-
- gf_log_set_loglevel (GF_LOG_NORMAL);
-
- /* Set this translator's inode table pointer to child node's pointer. */
- this->itable = FIRST_CHILD (this)->itable;
-
- return 0;
+ dict_t *options = NULL;
+ char *includes = NULL, *excludes = NULL;
+ char *forced_loglevel = NULL;
+ eh_t *history = NULL;
+ int ret = -1;
+ size_t history_size = TRACE_DEFAULT_HISTORY_SIZE;
+ trace_conf_t *conf = NULL;
+
+ if (!this)
+ return -1;
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "trace translator requires one subvolume");
+ return -1;
+ }
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ conf = GF_CALLOC (1, sizeof (trace_conf_t), gf_trace_mt_trace_conf_t);
+ if (!conf) {
+ gf_log (this->name, GF_LOG_ERROR, "cannot allocate "
+ "xl->private");
+ return -1;
+ }
+
+ options = this->options;
+ includes = data_to_str (dict_get (options, "include-ops"));
+ excludes = data_to_str (dict_get (options, "exclude-ops"));
+
+ {
+ int i;
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ if (gf_fop_list[i])
+ strncpy (trace_fop_names[i].name,
+ gf_fop_list[i],
+ strlen (gf_fop_list[i]));
+ else
+ strncpy (trace_fop_names[i].name, ":O",
+ strlen (":O"));
+ trace_fop_names[i].enabled = 1;
+ }
+ }
+
+ if (includes && excludes) {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "must specify only one of 'include-ops' and "
+ "'exclude-ops'");
+ return -1;
+ }
+
+ if (includes)
+ process_call_list (includes, 1);
+ if (excludes)
+ process_call_list (excludes, 0);
+
+
+ GF_OPTION_INIT ("history-size", conf->history_size, size, out);
+
+ gf_log (this->name, GF_LOG_INFO, "history size %"GF_PRI_SIZET,
+ history_size);
+
+ GF_OPTION_INIT ("log-file", conf->log_file, bool, out);
+
+ gf_log (this->name, GF_LOG_INFO, "logging to file %s",
+ (conf->log_file == _gf_true)?"enabled":"disabled");
+
+ GF_OPTION_INIT ("log-history", conf->log_history, bool, out);
+
+ gf_log (this->name, GF_LOG_DEBUG, "logging to history %s",
+ (conf->log_history == _gf_true)?"enabled":"disabled");
+
+ history = eh_new (history_size, _gf_false, NULL);
+ if (!history) {
+ gf_log (this->name, GF_LOG_ERROR, "event history cannot be "
+ "initialized");
+ return -1;
+ }
+
+ this->history = history;
+
+ conf->trace_log_level = GF_LOG_INFO;
+
+ if (dict_get (options, "force-log-level")) {
+ forced_loglevel = data_to_str (dict_get (options,
+ "force-log-level"));
+ if (!forced_loglevel)
+ goto setloglevel;
+
+ if (strcmp (forced_loglevel, "INFO") == 0)
+ conf->trace_log_level = GF_LOG_INFO;
+ else if (strcmp (forced_loglevel, "TRACE") == 0)
+ conf->trace_log_level = GF_LOG_TRACE;
+ else if (strcmp (forced_loglevel, "ERROR") == 0)
+ conf->trace_log_level = GF_LOG_ERROR;
+ else if (strcmp (forced_loglevel, "DEBUG") == 0)
+ conf->trace_log_level = GF_LOG_DEBUG;
+ else if (strcmp (forced_loglevel, "WARNING") == 0)
+ conf->trace_log_level = GF_LOG_WARNING;
+ else if (strcmp (forced_loglevel, "CRITICAL") == 0)
+ conf->trace_log_level = GF_LOG_CRITICAL;
+ else if (strcmp (forced_loglevel, "NONE") == 0)
+ conf->trace_log_level = GF_LOG_NONE;
+ }
+
+setloglevel:
+ gf_log_set_loglevel (conf->trace_log_level);
+ this->private = conf;
+ ret = 0;
+out:
+ if (ret == -1) {
+ if (history)
+ GF_FREE (history);
+ if (conf)
+ GF_FREE (conf);
+ }
+
+ return ret;
}
void
fini (xlator_t *this)
{
- if (!this)
- return;
-
- gf_log (this->name, GF_LOG_NORMAL,
- "trace translator unloaded");
- return;
+ if (!this)
+ return;
+
+ if (this->history)
+ eh_destroy (this->history);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "trace translator unloaded");
+ return;
}
struct xlator_fops fops = {
- .stat = trace_stat,
- .readlink = trace_readlink,
- .mknod = trace_mknod,
- .mkdir = trace_mkdir,
- .unlink = trace_unlink,
- .rmdir = trace_rmdir,
- .symlink = trace_symlink,
- .rename = trace_rename,
- .link = trace_link,
- .chmod = trace_chmod,
- .chown = trace_chown,
- .truncate = trace_truncate,
- .utimens = trace_utimens,
- .open = trace_open,
- .readv = trace_readv,
- .writev = trace_writev,
- .statfs = trace_statfs,
- .flush = trace_flush,
- .fsync = trace_fsync,
- .setxattr = trace_setxattr,
- .getxattr = trace_getxattr,
- .removexattr = trace_removexattr,
- .opendir = trace_opendir,
- .readdir = trace_readdir,
- .fsyncdir = trace_fsyncdir,
- .access = trace_access,
- .ftruncate = trace_ftruncate,
- .fstat = trace_fstat,
- .create = trace_create,
- .fchown = trace_fchown,
- .fchmod = trace_fchmod,
- .lk = trace_lk,
- .inodelk = trace_inodelk,
- .finodelk = trace_finodelk,
- .entrylk = trace_entrylk,
- .lookup = trace_lookup,
- .setdents = trace_setdents,
- .getdents = trace_getdents,
- .checksum = trace_checksum,
- .xattrop = trace_xattrop,
- .fxattrop = trace_fxattrop,
-};
-
-struct xlator_mops mops = {
- .stats = trace_stats,
+ .stat = trace_stat,
+ .readlink = trace_readlink,
+ .mknod = trace_mknod,
+ .mkdir = trace_mkdir,
+ .unlink = trace_unlink,
+ .rmdir = trace_rmdir,
+ .symlink = trace_symlink,
+ .rename = trace_rename,
+ .link = trace_link,
+ .truncate = trace_truncate,
+ .open = trace_open,
+ .readv = trace_readv,
+ .writev = trace_writev,
+ .statfs = trace_statfs,
+ .flush = trace_flush,
+ .fsync = trace_fsync,
+ .setxattr = trace_setxattr,
+ .getxattr = trace_getxattr,
+ .fsetxattr = trace_fsetxattr,
+ .fgetxattr = trace_fgetxattr,
+ .removexattr = trace_removexattr,
+ .opendir = trace_opendir,
+ .readdir = trace_readdir,
+ .readdirp = trace_readdirp,
+ .fsyncdir = trace_fsyncdir,
+ .access = trace_access,
+ .ftruncate = trace_ftruncate,
+ .fstat = trace_fstat,
+ .create = trace_create,
+ .lk = trace_lk,
+ .inodelk = trace_inodelk,
+ .finodelk = trace_finodelk,
+ .entrylk = trace_entrylk,
+ .fentrylk = trace_fentrylk,
+ .lookup = trace_lookup,
+ .rchecksum = trace_rchecksum,
+ .xattrop = trace_xattrop,
+ .fxattrop = trace_fxattrop,
+ .setattr = trace_setattr,
+ .fsetattr = trace_fsetattr,
};
struct xlator_cbks cbks = {
+ .release = trace_release,
+ .releasedir = trace_releasedir,
+ .forget = trace_forget,
};
struct volume_options options[] = {
- { .key = {"include-ops", "include"},
- .type = GF_OPTION_TYPE_STR,
- /*.value = { ""} */
- },
- { .key = {"exclude-ops", "exclude"},
- .type = GF_OPTION_TYPE_STR
- /*.value = { ""} */
- },
- { .key = {NULL} },
+ { .key = {"include-ops", "include"},
+ .type = GF_OPTION_TYPE_STR,
+ /*.value = { ""} */
+ },
+ { .key = {"exclude-ops", "exclude"},
+ .type = GF_OPTION_TYPE_STR
+ /*.value = { ""} */
+ },
+ { .key = {"history-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .default_value = "1024",
+ },
+ { .key = {"log-file"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ },
+ { .key = {"log-history"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ },
+ { .key = {NULL} },
};
+struct xlator_dumpops dumpops = {
+ .history = trace_dump_history
+};
diff --git a/xlators/debug/trace/src/trace.h b/xlators/debug/trace/src/trace.h
new file mode 100644
index 000000000..045eefb36
--- /dev/null
+++ b/xlators/debug/trace/src/trace.h
@@ -0,0 +1,98 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <time.h>
+#include <errno.h>
+#include "glusterfs.h"
+#include "xlator.h"
+#include "common-utils.h"
+#include "event-history.h"
+#include "logging.h"
+#include "circ-buff.h"
+#include "statedump.h"
+#include "options.h"
+
+#define TRACE_DEFAULT_HISTORY_SIZE 1024
+
+typedef struct {
+ /* Since the longest fop name is fremovexattr i.e 12 characters, array size
+ * is kept 24, i.e double of the maximum.
+ */
+ char name[24];
+ int enabled;
+} trace_fop_name_t;
+
+trace_fop_name_t trace_fop_names[GF_FOP_MAXVALUE];
+
+typedef struct {
+ gf_boolean_t log_file;
+ gf_boolean_t log_history;
+ size_t history_size;
+ int trace_log_level;
+} trace_conf_t;
+
+#define TRACE_STACK_UNWIND(op, frame, params ...) \
+ do { \
+ frame->local = NULL; \
+ STACK_UNWIND_STRICT (op, frame, params); \
+ } while (0);
+
+#define LOG_ELEMENT(_conf, _string) \
+ do { \
+ if (_conf) { \
+ if ((_conf->log_history) == _gf_true) \
+ gf_log_eh ("%s", _string); \
+ if ((_conf->log_file) == _gf_true) \
+ gf_log (THIS->name, _conf->trace_log_level, \
+ "%s", _string); \
+ } \
+ } while (0);
+
+#define trace_stat_to_str(buf, statstr) \
+ do { \
+ char atime_buf[256] = {0,}; \
+ char mtime_buf[256] = {0,}; \
+ char ctime_buf[256] = {0,}; \
+ uint64_t ia_time = 0; \
+ \
+ if (!buf) \
+ break; \
+ \
+ ia_time = buf->ia_atime; \
+ strftime (atime_buf, 256, "[%b %d %H:%M:%S]", \
+ localtime ((time_t *)&ia_time)); \
+ \
+ ia_time = buf->ia_mtime; \
+ strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", \
+ localtime ((time_t *)&ia_time)); \
+ \
+ ia_time = buf->ia_ctime; \
+ strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", \
+ localtime ((time_t *)&ia_time)); \
+ \
+ snprintf (statstr, sizeof (statstr), \
+ "gfid=%s ino=%"PRIu64", mode=%o, " \
+ "nlink=%"GF_PRI_NLINK", uid=%u, " \
+ "gid=%u, size=%"PRIu64", " \
+ "blocks=%"PRIu64", atime=%s, " \
+ "mtime=%s, ctime=%s", \
+ uuid_utoa (buf->ia_gfid), buf->ia_ino, \
+ st_mode_from_ia (buf->ia_prot, \
+ buf->ia_type), \
+ buf->ia_nlink, buf->ia_uid, \
+ buf->ia_gid, buf->ia_size, \
+ buf->ia_blocks, atime_buf, \
+ mtime_buf, ctime_buf); \
+ } while (0);
diff --git a/xlators/encryption/Makefile.am b/xlators/encryption/Makefile.am
index 2cbde680f..36efc6698 100644
--- a/xlators/encryption/Makefile.am
+++ b/xlators/encryption/Makefile.am
@@ -1,3 +1,3 @@
-SUBDIRS = rot-13
+SUBDIRS = rot-13 crypt
CLEANFILES =
diff --git a/xlators/cluster/unify/Makefile.am b/xlators/encryption/crypt/Makefile.am
index d471a3f92..d471a3f92 100644
--- a/xlators/cluster/unify/Makefile.am
+++ b/xlators/encryption/crypt/Makefile.am
diff --git a/xlators/encryption/crypt/src/Makefile.am b/xlators/encryption/crypt/src/Makefile.am
new file mode 100644
index 000000000..faadd117f
--- /dev/null
+++ b/xlators/encryption/crypt/src/Makefile.am
@@ -0,0 +1,24 @@
+if ENABLE_CRYPT_XLATOR
+
+xlator_LTLIBRARIES = crypt.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/encryption
+
+crypt_la_LDFLAGS = -module -avoidversion -lssl -lcrypto
+
+crypt_la_SOURCES = keys.c data.c metadata.c atom.c crypt.c
+crypt_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = crypt-common.h crypt-mem-types.h crypt.h metadata.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
+
+else
+
+noinst_DIST = keys.c data.c metadata.c atom.c crypt.c
+noinst_HEADERS = crypt-common.h crypt-mem-types.h crypt.h metadata.h
+
+endif \ No newline at end of file
diff --git a/xlators/encryption/crypt/src/atom.c b/xlators/encryption/crypt/src/atom.c
new file mode 100644
index 000000000..1ec41495c
--- /dev/null
+++ b/xlators/encryption/crypt/src/atom.c
@@ -0,0 +1,962 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "defaults.h"
+#include "crypt-common.h"
+#include "crypt.h"
+
+/*
+ * Glossary
+ *
+ *
+ * cblock (or cipher block). A logical unit in a file.
+ * cblock size is defined as the number of bits
+ * in an input (or output) block of the block
+ * cipher (*). Cipher block size is a property of
+ * cipher algorithm. E.g. cblock size is 64 bits
+ * for DES, 128 bits for AES, etc.
+ *
+ * atomic cipher A cipher algorithm, which requires some chunks of
+ * algorithm text to be padded at left and(or) right sides before
+ * cipher transaform.
+ *
+ *
+ * block (atom) Minimal chunk of file's data, which doesn't require
+ * padding. We'll consider logical units in a file of
+ * block size (atom size).
+ *
+ * cipher algorithm Atomic cipher algorithm, which requires the last
+ * with EOF issue incomplete cblock in a file to be padded with some
+ * data (usually zeros).
+ *
+ *
+ * operation, which reading/writing from offset, which is not aligned to
+ * forms a gap at to atom size
+ * the beginning
+ *
+ *
+ * operation, which reading/writing count bytes starting from offset off,
+ * forms a gap at so that off+count is not aligned to atom_size
+ * the end
+ *
+ * head block the first atom affected by an operation, which forms
+ * a gap at the beginning, or(and) at the end.
+ * Сomment. Head block has at least one gap (either at
+ * the beginning, or at the end)
+ *
+ *
+ * tail block the last atom different from head, affected by an
+ * operation, which forms a gap at the end.
+ * Сomment: Tail block has exactly one gap (at the end).
+ *
+ *
+ * partial block head or tail block
+ *
+ *
+ * full block block without gaps.
+ *
+ *
+ * (*) Recommendation for Block Cipher Modes of Operation
+ * Methods and Techniques
+ * NIST Special Publication 800-38A Edition 2001
+ */
+
+/*
+ * atom->offset_at()
+ */
+static off_t offset_at_head(struct avec_config *conf)
+{
+ return conf->aligned_offset;
+}
+
+static off_t offset_at_hole_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_at_head(get_hole_conf(frame));
+}
+
+static off_t offset_at_data_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_at_head(get_data_conf(frame));
+}
+
+
+static off_t offset_at_tail(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ return conf->aligned_offset +
+ (conf->off_in_head ? get_atom_size(object) : 0) +
+ (conf->nr_full_blocks << get_atom_bits(object));
+}
+
+static off_t offset_at_hole_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_at_tail(get_hole_conf(frame), object);
+}
+
+
+static off_t offset_at_data_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_at_tail(get_data_conf(frame), object);
+}
+
+static off_t offset_at_full(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ return conf->aligned_offset +
+ (conf->off_in_head ? get_atom_size(object) : 0);
+}
+
+static off_t offset_at_data_full(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_at_full(get_data_conf(frame), object);
+}
+
+static off_t offset_at_hole_full(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_at_full(get_hole_conf(frame), object);
+}
+
+/*
+ * atom->io_size_nopad()
+ */
+
+static uint32_t io_size_nopad_head(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ uint32_t gap_at_beg;
+ uint32_t gap_at_end;
+
+ check_head_block(conf);
+
+ gap_at_beg = conf->off_in_head;
+
+ if (has_tail_block(conf) || has_full_blocks(conf) || conf->off_in_tail == 0 )
+ gap_at_end = 0;
+ else
+ gap_at_end = get_atom_size(object) - conf->off_in_tail;
+
+ return get_atom_size(object) - (gap_at_beg + gap_at_end);
+}
+
+static uint32_t io_size_nopad_tail(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ check_tail_block(conf);
+ return conf->off_in_tail;
+}
+
+static uint32_t io_size_nopad_full(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ check_full_block(conf);
+ return get_atom_size(object);
+}
+
+static uint32_t io_size_nopad_data_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return io_size_nopad_head(get_data_conf(frame), object);
+}
+
+static uint32_t io_size_nopad_hole_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return io_size_nopad_head(get_hole_conf(frame), object);
+}
+
+static uint32_t io_size_nopad_data_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return io_size_nopad_tail(get_data_conf(frame), object);
+}
+
+static uint32_t io_size_nopad_hole_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return io_size_nopad_tail(get_hole_conf(frame), object);
+}
+
+static uint32_t io_size_nopad_data_full(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return io_size_nopad_full(get_data_conf(frame), object);
+}
+
+static uint32_t io_size_nopad_hole_full(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return io_size_nopad_full(get_hole_conf(frame), object);
+}
+
+static uint32_t offset_in_head(struct avec_config *conf)
+{
+ check_cursor_head(conf);
+
+ return conf->off_in_head;
+}
+
+static uint32_t offset_in_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return 0;
+}
+
+static uint32_t offset_in_full(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ check_cursor_full(conf);
+
+ if (has_head_block(conf))
+ return (conf->cursor - 1) << get_atom_bits(object);
+ else
+ return conf->cursor << get_atom_bits(object);
+}
+
+static uint32_t offset_in_data_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_in_head(get_data_conf(frame));
+}
+
+static uint32_t offset_in_hole_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_in_head(get_hole_conf(frame));
+}
+
+static uint32_t offset_in_data_full(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_in_full(get_data_conf(frame), object);
+}
+
+static uint32_t offset_in_hole_full(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_in_full(get_hole_conf(frame), object);
+}
+
+/*
+ * atom->rmw()
+ */
+/*
+ * Pre-conditions:
+ * @vec contains plain text of the latest
+ * version.
+ *
+ * Uptodate gaps of the @partial block with
+ * this plain text, encrypt the whole block
+ * and write the result to disk.
+ */
+static int32_t rmw_partial_block(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ struct rmw_atom *atom)
+{
+ size_t was_read = 0;
+ uint64_t file_size;
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+
+ struct iovec *partial = atom->get_iovec(frame, 0);
+ struct avec_config *conf = atom->get_config(frame);
+ end_writeback_handler_t end_writeback_partial_block;
+#if DEBUG_CRYPT
+ gf_boolean_t check_last_cblock = _gf_false;
+#endif
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0)
+ goto exit;
+
+ file_size = local->cur_file_size;
+ was_read = op_ret;
+
+ if (atom->locality == HEAD_ATOM && conf->off_in_head) {
+ /*
+ * head atom with a non-uptodate gap
+ * at the beginning
+ *
+ * fill the gap with plain text of the
+ * latest version. Convert a part of hole
+ * (if any) to zeros.
+ */
+ int32_t i;
+ int32_t copied = 0;
+ int32_t to_gap; /* amount of data needed to uptodate
+ the gap at the beginning */
+#if 0
+ int32_t hole = 0; /* The part of the hole which
+ * got in the head block */
+#endif /* 0 */
+ to_gap = conf->off_in_head;
+
+ if (was_read < to_gap) {
+ if (file_size >
+ offset_at_head(conf) + was_read) {
+ /*
+ * It is impossible to uptodate
+ * head block: too few bytes have
+ * been read from disk, so that
+ * partial write is impossible.
+ *
+ * It could happen because of many
+ * reasons: IO errors, (meta)data
+ * corruption in the local file system,
+ * etc.
+ */
+ gf_log(this->name, GF_LOG_WARNING,
+ "Can not uptodate a gap at the beginning");
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto exit;
+ }
+#if 0
+ hole = to_gap - was_read;
+#endif /* 0 */
+ to_gap = was_read;
+ }
+ /*
+ * uptodate the gap at the beginning
+ */
+ for (i = 0; i < count && copied < to_gap; i++) {
+ int32_t to_copy;
+
+ to_copy = vec[i].iov_len;
+ if (to_copy > to_gap - copied)
+ to_copy = to_gap - copied;
+
+ memcpy(partial->iov_base, vec[i].iov_base, to_copy);
+ copied += to_copy;
+ }
+#if 0
+ /*
+ * If possible, convert part of the
+ * hole, which got in the head block
+ */
+ ret = TRY_LOCK(&local->hole_lock);
+ if (!ret) {
+ if (local->hole_handled)
+ /*
+ * already converted by
+ * crypt_writev_cbk()
+ */
+ UNLOCK(&local->hole_lock);
+ else {
+ /*
+ * convert the part of the hole
+ * which got in the head block
+ * to zeros.
+ *
+ * Update the orig_offset to make
+ * sure writev_cbk() won't care
+ * about this part of the hole.
+ *
+ */
+ memset(partial->iov_base + to_gap, 0, hole);
+
+ conf->orig_offset -= hole;
+ conf->orig_size += hole;
+ UNLOCK(&local->hole_lock);
+ }
+ }
+ else /*
+ * conversion is being performed
+ * by crypt_writev_cbk()
+ */
+ ;
+#endif /* 0 */
+ }
+ if (atom->locality == TAIL_ATOM ||
+ (!has_tail_block(conf) && conf->off_in_tail)) {
+ /*
+ * tail atom, or head atom with a non-uptodate
+ * gap at the end.
+ *
+ * fill the gap at the end of the block
+ * with plain text of the latest version.
+ * Pad the result, (if needed)
+ */
+ int32_t i;
+ int32_t to_gap;
+ int copied;
+ off_t off_in_tail;
+ int32_t to_copy;
+
+ off_in_tail = conf->off_in_tail;
+ to_gap = conf->gap_in_tail;
+
+ if (to_gap && was_read < off_in_tail + to_gap) {
+ /*
+ * It is impossible to uptodate
+ * the gap at the end: too few bytes
+ * have been read from disk, so that
+ * partial write is impossible.
+ *
+ * It could happen because of many
+ * reasons: IO errors, (meta)data
+ * corruption in the local file system,
+ * etc.
+ */
+ gf_log(this->name, GF_LOG_WARNING,
+ "Can not uptodate a gap at the end");
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto exit;
+ }
+ /*
+ * uptodate the gap at the end
+ */
+ copied = 0;
+ to_copy = to_gap;
+ for(i = count - 1; i >= 0 && to_copy > 0; i--) {
+ uint32_t from_vec, off_in_vec;
+
+ off_in_vec = 0;
+ from_vec = vec[i].iov_len;
+ if (from_vec > to_copy) {
+ off_in_vec = from_vec - to_copy;
+ from_vec = to_copy;
+ }
+ memcpy(partial->iov_base +
+ off_in_tail + to_gap - copied - from_vec,
+ vec[i].iov_base + off_in_vec,
+ from_vec);
+
+ gf_log(this->name, GF_LOG_DEBUG,
+ "uptodate %d bytes at tail. Offset at target(source): %d(%d)",
+ (int)from_vec,
+ (int)off_in_tail + to_gap - copied - from_vec,
+ (int)off_in_vec);
+
+ copied += from_vec;
+ to_copy -= from_vec;
+ }
+ partial->iov_len = off_in_tail + to_gap;
+
+ if (object_alg_should_pad(object)) {
+ int32_t resid = 0;
+ resid = partial->iov_len & (object_alg_blksize(object) - 1);
+ if (resid) {
+ /*
+ * append a new EOF padding
+ */
+ local->eof_padding_size =
+ object_alg_blksize(object) - resid;
+
+ gf_log(this->name, GF_LOG_DEBUG,
+ "set padding size %d",
+ local->eof_padding_size);
+
+ memset(partial->iov_base + partial->iov_len,
+ 1,
+ local->eof_padding_size);
+ partial->iov_len += local->eof_padding_size;
+#if DEBUG_CRYPT
+ gf_log(this->name, GF_LOG_DEBUG,
+ "pad cblock with %d zeros:",
+ local->eof_padding_size);
+ dump_cblock(this,
+ (unsigned char *)partial->iov_base +
+ partial->iov_len - object_alg_blksize(object));
+ check_last_cblock = _gf_true;
+#endif
+ }
+ }
+ }
+ /*
+ * encrypt the whole block
+ */
+ encrypt_aligned_iov(object,
+ partial,
+ 1,
+ atom->offset_at(frame, object));
+#if DEBUG_CRYPT
+ if (check_last_cblock == _gf_true) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "encrypt last cblock with offset %llu",
+ (unsigned long long)atom->offset_at(frame, object));
+ dump_cblock(this, (unsigned char *)partial->iov_base +
+ partial->iov_len - object_alg_blksize(object));
+ }
+#endif
+ set_local_io_params_writev(frame, object, atom,
+ atom->offset_at(frame, object),
+ iovec_get_size(partial, 1));
+ /*
+ * write the whole block to disk
+ */
+ end_writeback_partial_block = dispatch_end_writeback(local->fop);
+ conf->cursor ++;
+ STACK_WIND(frame,
+ end_writeback_partial_block,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev,
+ local->fd,
+ partial,
+ 1,
+ atom->offset_at(frame, object),
+ local->flags,
+ local->iobref_data,
+ local->xdata);
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "submit partial block: %d bytes from %d offset",
+ (int)iovec_get_size(partial, 1),
+ (int)atom->offset_at(frame, object));
+ exit:
+ return 0;
+}
+
+/*
+ * Perform a (read-)modify-write sequence.
+ * This should be performed only after approval
+ * of upper server-side manager, i.e. the caller
+ * needs to make sure this is his turn to rmw.
+ */
+void submit_partial(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ atom_locality_type ltype)
+{
+ int32_t ret;
+ dict_t *dict;
+ struct rmw_atom *atom;
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+
+ atom = atom_by_types(local->active_setup, ltype);
+ /*
+ * To perform the "read" component of the read-modify-write
+ * sequence the crypt translator does stack_wind to itself.
+ *
+ * Pass current file size to crypt_readv()
+ */
+ dict = dict_new();
+ if (!dict) {
+ /*
+ * FIXME: Handle the error
+ */
+ gf_log("crypt", GF_LOG_WARNING, "Can not alloc dict");
+ return;
+ }
+ ret = dict_set(dict,
+ FSIZE_XATTR_PREFIX,
+ data_from_uint64(local->cur_file_size));
+ if (ret) {
+ /*
+ * FIXME: Handle the error
+ */
+ dict_unref(dict);
+ gf_log("crypt", GF_LOG_WARNING, "Can not set dict");
+ goto exit;
+ }
+ STACK_WIND(frame,
+ atom->rmw,
+ this,
+ this->fops->readv, /* crypt_readv */
+ fd,
+ atom->count_to_uptodate(frame, object), /* count */
+ atom->offset_at(frame, object), /* offset to read from */
+ 0,
+ dict);
+ exit:
+ dict_unref(dict);
+}
+
+/*
+ * submit blocks of FULL_ATOM type
+ */
+void submit_full(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+ struct rmw_atom *atom = atom_by_types(local->active_setup, FULL_ATOM);
+ uint32_t count; /* total number of full blocks to submit */
+ uint32_t granularity; /* number of blocks to submit in one iteration */
+
+ uint64_t off_in_file; /* start offset in the file, bytes */
+ uint32_t off_in_atom; /* start offset in the atom, blocks */
+ uint32_t blocks_written = 0; /* blocks written for this submit */
+
+ struct avec_config *conf = atom->get_config(frame);
+ end_writeback_handler_t end_writeback_full_block;
+ /*
+ * Write full blocks by groups of granularity size.
+ */
+ end_writeback_full_block = dispatch_end_writeback(local->fop);
+
+ if (is_ordered_mode(frame)) {
+ uint32_t skip = has_head_block(conf) ? 1 : 0;
+ count = 1;
+ granularity = 1;
+ /*
+ * calculate start offset using cursor value;
+ * here we should take into accout head block,
+ * which corresponds to cursor value 0.
+ */
+ off_in_file = atom->offset_at(frame, object) +
+ ((conf->cursor - skip) << get_atom_bits(object));
+ off_in_atom = conf->cursor - skip;
+ }
+ else {
+ /*
+ * in parallel mode
+ */
+ count = conf->nr_full_blocks;
+ granularity = MAX_IOVEC;
+ off_in_file = atom->offset_at(frame, object);
+ off_in_atom = 0;
+ }
+ while (count) {
+ uint32_t blocks_to_write = count;
+
+ if (blocks_to_write > granularity)
+ blocks_to_write = granularity;
+ if (conf->type == HOLE_ATOM)
+ /*
+ * reset iovec before encryption
+ */
+ memset(atom->get_iovec(frame, 0)->iov_base,
+ 0,
+ get_atom_size(object));
+ /*
+ * encrypt the group
+ */
+ encrypt_aligned_iov(object,
+ atom->get_iovec(frame,
+ off_in_atom +
+ blocks_written),
+ blocks_to_write,
+ off_in_file + (blocks_written <<
+ get_atom_bits(object)));
+
+ set_local_io_params_writev(frame, object, atom,
+ off_in_file + (blocks_written << get_atom_bits(object)),
+ blocks_to_write << get_atom_bits(object));
+
+ conf->cursor += blocks_to_write;
+
+ STACK_WIND(frame,
+ end_writeback_full_block,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev,
+ local->fd,
+ atom->get_iovec(frame, off_in_atom + blocks_written),
+ blocks_to_write,
+ off_in_file + (blocks_written << get_atom_bits(object)),
+ local->flags,
+ local->iobref_data ? local->iobref_data : local->iobref,
+ local->xdata);
+
+ gf_log("crypt", GF_LOG_DEBUG, "submit %d full blocks from %d offset",
+ blocks_to_write,
+ (int)(off_in_file + (blocks_written << get_atom_bits(object))));
+
+ count -= blocks_to_write;
+ blocks_written += blocks_to_write;
+ }
+ return;
+}
+
+static int32_t rmw_data_head(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ return rmw_partial_block(frame,
+ cookie,
+ this,
+ op_ret,
+ op_errno,
+ vec,
+ count,
+ stbuf,
+ iobref,
+ atom_by_types(DATA_ATOM, HEAD_ATOM));
+}
+
+static int32_t rmw_data_tail(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ return rmw_partial_block(frame,
+ cookie,
+ this,
+ op_ret,
+ op_errno,
+ vec,
+ count,
+ stbuf,
+ iobref,
+ atom_by_types(DATA_ATOM, TAIL_ATOM));
+}
+
+static int32_t rmw_hole_head(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ return rmw_partial_block(frame,
+ cookie,
+ this,
+ op_ret,
+ op_errno,
+ vec,
+ count,
+ stbuf,
+ iobref,
+ atom_by_types(HOLE_ATOM, HEAD_ATOM));
+}
+
+static int32_t rmw_hole_tail(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ return rmw_partial_block(frame,
+ cookie,
+ this,
+ op_ret,
+ op_errno,
+ vec,
+ count,
+ stbuf,
+ iobref,
+ atom_by_types(HOLE_ATOM, TAIL_ATOM));
+}
+
+/*
+ * atom->count_to_uptodate()
+ */
+static uint32_t count_to_uptodate_head(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ if (conf->acount == 1 && conf->off_in_tail)
+ return get_atom_size(object);
+ else
+ /* there is no need to read the whole head block */
+ return conf->off_in_head;
+}
+
+static uint32_t count_to_uptodate_tail(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ /* we need to read the whole tail block */
+ return get_atom_size(object);
+}
+
+static uint32_t count_to_uptodate_data_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return count_to_uptodate_head(get_data_conf(frame), object);
+}
+
+static uint32_t count_to_uptodate_data_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return count_to_uptodate_tail(get_data_conf(frame), object);
+}
+
+static uint32_t count_to_uptodate_hole_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return count_to_uptodate_head(get_hole_conf(frame), object);
+}
+
+static uint32_t count_to_uptodate_hole_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return count_to_uptodate_tail(get_hole_conf(frame), object);
+}
+
+/* atom->get_config() */
+
+static struct avec_config *get_config_data(call_frame_t *frame)
+{
+ return &((crypt_local_t *)frame->local)->data_conf;
+}
+
+static struct avec_config *get_config_hole(call_frame_t *frame)
+{
+ return &((crypt_local_t *)frame->local)->hole_conf;
+}
+
+/*
+ * atom->get_iovec()
+ */
+static struct iovec *get_iovec_hole_head(call_frame_t *frame,
+ uint32_t count)
+{
+ struct avec_config *conf = get_hole_conf(frame);
+
+ return conf->avec;
+}
+
+static struct iovec *get_iovec_hole_full(call_frame_t *frame,
+ uint32_t count)
+{
+ struct avec_config *conf = get_hole_conf(frame);
+
+ return conf->avec + (conf->off_in_head ? 1 : 0);
+}
+
+static inline struct iovec *get_iovec_hole_tail(call_frame_t *frame,
+ uint32_t count)
+{
+ struct avec_config *conf = get_hole_conf(frame);
+
+ return conf->avec + (conf->blocks_in_pool - 1);
+}
+
+static inline struct iovec *get_iovec_data_head(call_frame_t *frame,
+ uint32_t count)
+{
+ struct avec_config *conf = get_data_conf(frame);
+
+ return conf->avec;
+}
+
+static inline struct iovec *get_iovec_data_full(call_frame_t *frame,
+ uint32_t count)
+{
+ struct avec_config *conf = get_data_conf(frame);
+
+ return conf->avec + (conf->off_in_head ? 1 : 0) + count;
+}
+
+static inline struct iovec *get_iovec_data_tail(call_frame_t *frame,
+ uint32_t count)
+{
+ struct avec_config *conf = get_data_conf(frame);
+
+ return conf->avec +
+ (conf->off_in_head ? 1 : 0) +
+ conf->nr_full_blocks;
+}
+
+static struct rmw_atom atoms[LAST_DATA_TYPE][LAST_LOCALITY_TYPE] = {
+ [DATA_ATOM][HEAD_ATOM] =
+ { .locality = HEAD_ATOM,
+ .rmw = rmw_data_head,
+ .offset_at = offset_at_data_head,
+ .offset_in = offset_in_data_head,
+ .get_iovec = get_iovec_data_head,
+ .io_size_nopad = io_size_nopad_data_head,
+ .count_to_uptodate = count_to_uptodate_data_head,
+ .get_config = get_config_data
+ },
+ [DATA_ATOM][TAIL_ATOM] =
+ { .locality = TAIL_ATOM,
+ .rmw = rmw_data_tail,
+ .offset_at = offset_at_data_tail,
+ .offset_in = offset_in_tail,
+ .get_iovec = get_iovec_data_tail,
+ .io_size_nopad = io_size_nopad_data_tail,
+ .count_to_uptodate = count_to_uptodate_data_tail,
+ .get_config = get_config_data
+ },
+ [DATA_ATOM][FULL_ATOM] =
+ { .locality = FULL_ATOM,
+ .offset_at = offset_at_data_full,
+ .offset_in = offset_in_data_full,
+ .get_iovec = get_iovec_data_full,
+ .io_size_nopad = io_size_nopad_data_full,
+ .get_config = get_config_data
+ },
+ [HOLE_ATOM][HEAD_ATOM] =
+ { .locality = HEAD_ATOM,
+ .rmw = rmw_hole_head,
+ .offset_at = offset_at_hole_head,
+ .offset_in = offset_in_hole_head,
+ .get_iovec = get_iovec_hole_head,
+ .io_size_nopad = io_size_nopad_hole_head,
+ .count_to_uptodate = count_to_uptodate_hole_head,
+ .get_config = get_config_hole
+ },
+ [HOLE_ATOM][TAIL_ATOM] =
+ { .locality = TAIL_ATOM,
+ .rmw = rmw_hole_tail,
+ .offset_at = offset_at_hole_tail,
+ .offset_in = offset_in_tail,
+ .get_iovec = get_iovec_hole_tail,
+ .io_size_nopad = io_size_nopad_hole_tail,
+ .count_to_uptodate = count_to_uptodate_hole_tail,
+ .get_config = get_config_hole
+ },
+ [HOLE_ATOM][FULL_ATOM] =
+ { .locality = FULL_ATOM,
+ .offset_at = offset_at_hole_full,
+ .offset_in = offset_in_hole_full,
+ .get_iovec = get_iovec_hole_full,
+ .io_size_nopad = io_size_nopad_hole_full,
+ .get_config = get_config_hole
+ }
+};
+
+struct rmw_atom *atom_by_types(atom_data_type data,
+ atom_locality_type locality)
+{
+ return &atoms[data][locality];
+}
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
diff --git a/xlators/encryption/crypt/src/crypt-common.h b/xlators/encryption/crypt/src/crypt-common.h
new file mode 100644
index 000000000..7c212ad5d
--- /dev/null
+++ b/xlators/encryption/crypt/src/crypt-common.h
@@ -0,0 +1,141 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __CRYPT_COMMON_H__
+#define __CRYPT_COMMON_H__
+
+#define INVAL_SUBVERSION_NUMBER (0xff)
+#define CRYPT_INVAL_OP (GF_FOP_NULL)
+
+#define CRYPTO_FORMAT_PREFIX "trusted.glusterfs.crypt.att.cfmt"
+#define FSIZE_XATTR_PREFIX "trusted.glusterfs.crypt.att.size"
+#define SUBREQ_PREFIX "trusted.glusterfs.crypt.msg.sreq"
+#define FSIZE_MSG_PREFIX "trusted.glusterfs.crypt.msg.size"
+#define DE_MSG_PREFIX "trusted.glusterfs.crypt.msg.dent"
+#define REQUEST_ID_PREFIX "trusted.glusterfs.crypt.msg.rqid"
+#define MSGFLAGS_PREFIX "trusted.glusterfs.crypt.msg.xfgs"
+
+
+/* messages for crypt_open() */
+#define MSGFLAGS_REQUEST_MTD_RLOCK 1 /* take read lock and don't unlock */
+#define MSGFLAGS_REQUEST_MTD_WLOCK 2 /* take write lock and don't unlock */
+
+#define AES_BLOCK_BITS (4) /* AES_BLOCK_SIZE == 1 << AES_BLOCK_BITS */
+
+#define noop do {; } while (0)
+#define cassert(cond) ({ switch (-1) { case (cond): case 0: break; } })
+#define __round_mask(x, y) ((__typeof__(x))((y)-1))
+#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
+
+/*
+ * Format of file's metadata
+ */
+struct crypt_format {
+ uint8_t loader_id; /* version of metadata loader */
+ uint8_t versioned[0]; /* file's metadata of specific version */
+} __attribute__((packed));
+
+typedef enum {
+ AES_CIPHER_ALG,
+ LAST_CIPHER_ALG
+} cipher_alg_t;
+
+typedef enum {
+ XTS_CIPHER_MODE,
+ LAST_CIPHER_MODE
+} cipher_mode_t;
+
+typedef enum {
+ MTD_LOADER_V1,
+ LAST_MTD_LOADER
+} mtd_loader_id;
+
+static inline void msgflags_set_mtd_rlock(uint32_t *flags)
+{
+ *flags |= MSGFLAGS_REQUEST_MTD_RLOCK;
+}
+
+static inline void msgflags_set_mtd_wlock(uint32_t *flags)
+{
+ *flags |= MSGFLAGS_REQUEST_MTD_WLOCK;
+}
+
+static inline gf_boolean_t msgflags_check_mtd_rlock(uint32_t *flags)
+{
+ return *flags & MSGFLAGS_REQUEST_MTD_RLOCK;
+}
+
+static inline gf_boolean_t msgflags_check_mtd_wlock(uint32_t *flags)
+{
+ return *flags & MSGFLAGS_REQUEST_MTD_WLOCK;
+}
+
+static inline gf_boolean_t msgflags_check_mtd_lock(uint32_t *flags)
+{
+ return msgflags_check_mtd_rlock(flags) ||
+ msgflags_check_mtd_wlock(flags);
+}
+
+/*
+ * returns number of logical blocks occupied
+ * (maybe partially) by @count bytes
+ * at offset @start.
+ */
+static inline off_t logical_blocks_occupied(uint64_t start, off_t count,
+ int blkbits)
+{
+ return ((start + count - 1) >> blkbits) - (start >> blkbits) + 1;
+}
+
+/*
+ * are two bytes (represented by offsets @off1
+ * and @off2 respectively) in the same logical
+ * block.
+ */
+static inline int in_same_lblock(uint64_t off1, uint64_t off2,
+ int blkbits)
+{
+ return off1 >> blkbits == off2 >> blkbits;
+}
+
+static inline void dump_cblock(xlator_t *this, unsigned char *buf)
+{
+ gf_log(this->name, GF_LOG_DEBUG,
+ "dump cblock: %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x",
+ (buf)[0],
+ (buf)[1],
+ (buf)[2],
+ (buf)[3],
+ (buf)[4],
+ (buf)[5],
+ (buf)[6],
+ (buf)[7],
+ (buf)[8],
+ (buf)[9],
+ (buf)[10],
+ (buf)[11],
+ (buf)[12],
+ (buf)[13],
+ (buf)[14],
+ (buf)[15]);
+}
+
+#endif /* __CRYPT_COMMON_H__ */
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
diff --git a/xlators/encryption/crypt/src/crypt-mem-types.h b/xlators/encryption/crypt/src/crypt-mem-types.h
new file mode 100644
index 000000000..799727573
--- /dev/null
+++ b/xlators/encryption/crypt/src/crypt-mem-types.h
@@ -0,0 +1,43 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef __CRYPT_MEM_TYPES_H__
+#define __CRYPT_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_crypt_mem_types_ {
+ gf_crypt_mt_priv = gf_common_mt_end + 1,
+ gf_crypt_mt_inode,
+ gf_crypt_mt_data,
+ gf_crypt_mt_mtd,
+ gf_crypt_mt_loc,
+ gf_crypt_mt_iatt,
+ gf_crypt_mt_key,
+ gf_crypt_mt_iovec,
+ gf_crypt_mt_char,
+};
+
+#endif /* __CRYPT_MEM_TYPES_H__ */
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
+
+
+
diff --git a/xlators/encryption/crypt/src/crypt.c b/xlators/encryption/crypt/src/crypt.c
new file mode 100644
index 000000000..db2e6d83c
--- /dev/null
+++ b/xlators/encryption/crypt/src/crypt.c
@@ -0,0 +1,4498 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <ctype.h>
+#include <sys/uio.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "logging.h"
+#include "defaults.h"
+
+#include "crypt-common.h"
+#include "crypt.h"
+
+static void init_inode_info_head(struct crypt_inode_info *info, fd_t *fd);
+static int32_t init_inode_info_tail(struct crypt_inode_info *info,
+ struct master_cipher_info *master);
+static int32_t prepare_for_submit_hole(call_frame_t *frame, xlator_t *this,
+ uint64_t from, off_t size);
+static int32_t load_file_size(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata);
+static void do_ordered_submit(call_frame_t *frame, xlator_t *this,
+ atom_data_type dtype);
+static void do_parallel_submit(call_frame_t *frame, xlator_t *this,
+ atom_data_type dtype);
+static void put_one_call_open(call_frame_t *frame);
+static void put_one_call_readv(call_frame_t *frame, xlator_t *this);
+static void put_one_call_writev(call_frame_t *frame, xlator_t *this);
+static void put_one_call_ftruncate(call_frame_t *frame, xlator_t *this);
+static void free_avec(struct iovec *avec, char **pool, int blocks_in_pool);
+static void free_avec_data(crypt_local_t *local);
+static void free_avec_hole(crypt_local_t *local);
+
+static crypt_local_t *crypt_alloc_local(call_frame_t *frame, xlator_t *this,
+ glusterfs_fop_t fop)
+{
+ crypt_local_t *local = NULL;
+
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ gf_log(this->name, GF_LOG_ERROR, "out of memory");
+ return NULL;
+ }
+ local->fop = fop;
+ LOCK_INIT(&local->hole_lock);
+ LOCK_INIT(&local->call_lock);
+ LOCK_INIT(&local->rw_count_lock);
+
+ frame->local = local;
+ return local;
+}
+
+struct crypt_inode_info *get_crypt_inode_info(inode_t *inode, xlator_t *this)
+{
+ int ret;
+ uint64_t value = 0;
+ struct crypt_inode_info *info;
+
+ ret = inode_ctx_get(inode, this, &value);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Can not get inode info");
+ return NULL;
+ }
+ info = (struct crypt_inode_info *)(long)value;
+ if (info == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Can not obtain inode info");
+ return NULL;
+ }
+ return info;
+}
+
+static struct crypt_inode_info *local_get_inode_info(crypt_local_t *local,
+ xlator_t *this)
+{
+ if (local->info)
+ return local->info;
+ local->info = get_crypt_inode_info(local->fd->inode, this);
+ return local->info;
+}
+
+static struct crypt_inode_info *alloc_inode_info(crypt_local_t *local,
+ loc_t *loc)
+{
+ struct crypt_inode_info *info;
+
+ info = GF_CALLOC(1, sizeof(*info), gf_crypt_mt_inode);
+ if (!info) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ gf_log ("crypt", GF_LOG_WARNING,
+ "Can not allocate inode info");
+ return NULL;
+ }
+ memset(info, 0, sizeof(*info));
+#if DEBUG_CRYPT
+ info->loc = GF_CALLOC(1, sizeof(*loc), gf_crypt_mt_loc);
+ if (!info->loc) {
+ gf_log("crypt", GF_LOG_WARNING, "Can not allocate loc");
+ GF_FREE(info);
+ return NULL;
+ }
+ if (loc_copy(info->loc, loc)){
+ GF_FREE(info->loc);
+ GF_FREE(info);
+ return NULL;
+ }
+#endif /* DEBUG_CRYPT */
+
+ local->info = info;
+ return info;
+}
+
+static void free_inode_info(struct crypt_inode_info *info)
+{
+#if DEBUG_CRYPT
+ loc_wipe(info->loc);
+ GF_FREE(info->loc);
+#endif
+ memset(info, 0, sizeof(*info));
+ GF_FREE(info);
+}
+
+int crypt_forget (xlator_t *this, inode_t *inode)
+{
+ uint64_t ctx_addr = 0;
+ if (!inode_ctx_del (inode, this, &ctx_addr))
+ free_inode_info((struct crypt_inode_info *)(long)ctx_addr);
+ return 0;
+}
+
+#if DEBUG_CRYPT
+static void check_read(call_frame_t *frame, xlator_t *this, int32_t read,
+ struct iovec *vec, int32_t count, struct iatt *stbuf)
+{
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = get_object_cinfo(local->info);
+ struct avec_config *conf = &local->data_conf;
+ uint32_t resid = stbuf->ia_size & (object_alg_blksize(object) - 1);
+
+ if (read <= 0)
+ return;
+ if (read != iovec_get_size(vec, count))
+ gf_log ("crypt", GF_LOG_DEBUG,
+ "op_ret differs from amount of read bytes");
+
+ if (object_alg_should_pad(object) && (read & (object_alg_blksize(object) - 1)))
+ gf_log ("crypt", GF_LOG_DEBUG,
+ "bad amount of read bytes (!= 0 mod(cblock size))");
+
+ if (conf->aligned_offset + read >
+ stbuf->ia_size + (resid ? object_alg_blksize(object) - resid : 0))
+ gf_log ("crypt", GF_LOG_DEBUG,
+ "bad amount of read bytes (too large))");
+
+}
+
+#define PT_BYTES_TO_DUMP (32)
+static void dump_plain_text(crypt_local_t *local, struct iovec *avec)
+{
+ int32_t to_dump;
+ char str[PT_BYTES_TO_DUMP + 1];
+
+ if (!avec)
+ return;
+ to_dump = avec->iov_len;
+ if (to_dump > PT_BYTES_TO_DUMP)
+ to_dump = PT_BYTES_TO_DUMP;
+ memcpy(str, avec->iov_base, to_dump);
+ memset(str + to_dump, '0', 1);
+ gf_log("crypt", GF_LOG_DEBUG, "Read file: %s", str);
+}
+
+static int32_t data_conf_invariant(struct avec_config *conf)
+{
+ return conf->acount ==
+ !!has_head_block(conf) +
+ !!has_tail_block(conf)+
+ conf->nr_full_blocks;
+}
+
+static int32_t hole_conf_invariant(struct avec_config *conf)
+{
+ return conf->blocks_in_pool ==
+ !!has_head_block(conf) +
+ !!has_tail_block(conf)+
+ !!has_full_blocks(conf);
+}
+
+static void crypt_check_conf(struct avec_config *conf)
+{
+ int32_t ret = 0;
+ const char *msg;
+
+ switch (conf->type) {
+ case DATA_ATOM:
+ msg = "data";
+ ret = data_conf_invariant(conf);
+ break;
+ case HOLE_ATOM:
+ msg = "hole";
+ ret = hole_conf_invariant(conf);
+ break;
+ default:
+ msg = "unknown";
+ }
+ if (!ret)
+ gf_log("crypt", GF_LOG_DEBUG, "bad %s conf", msg);
+}
+
+static void check_buf(call_frame_t *frame, xlator_t *this, struct iatt *buf)
+{
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+ uint64_t local_file_size;
+
+ switch(local->fop) {
+ case GF_FOP_FTRUNCATE:
+ return;
+ case GF_FOP_WRITE:
+ local_file_size = local->new_file_size;
+ break;
+ case GF_FOP_READ:
+ if (parent_is_crypt_xlator(frame, this))
+ return;
+ local_file_size = local->cur_file_size;
+ break;
+ default:
+ gf_log("crypt", GF_LOG_DEBUG, "bad file operation");
+ return;
+ }
+ if (buf->ia_size != round_up(local_file_size,
+ object_alg_blksize(object)))
+ gf_log("crypt", GF_LOG_DEBUG,
+ "bad ia_size in buf (%llu), should be %llu",
+ (unsigned long long)buf->ia_size,
+ (unsigned long long)round_up(local_file_size,
+ object_alg_blksize(object)));
+}
+
+#else
+#define check_read(frame, this, op_ret, vec, count, stbuf) noop
+#define dump_plain_text(local, avec) noop
+#define crypt_check_conf(conf) noop
+#define check_buf(frame, this, buf) noop
+#endif /* DEBUG_CRYPT */
+
+/*
+ * Pre-conditions:
+ * @vec represents a ciphertext of expanded size and
+ * aligned offset.
+ *
+ * Compound a temporal vector @avec with block-aligned
+ * components, decrypt and fix it up to represent a chunk
+ * of data corresponding to the original size and offset.
+ * Pass the result to the next translator.
+ */
+int32_t crypt_readv_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = &local->data_conf;
+ struct object_cipher_info *object = &local->info->cinfo;
+
+ struct iovec *avec;
+ uint32_t i;
+ uint32_t to_vec;
+ uint32_t to_user;
+
+ check_buf(frame, this, stbuf);
+ check_read(frame, this, op_ret, vec, count, stbuf);
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ local->iobref = iobref_ref(iobref);
+
+ local->buf = *stbuf;
+ local->buf.ia_size = local->cur_file_size;
+
+ if (op_ret <= 0 || count == 0 || vec[0].iov_len == 0)
+ goto put_one_call;
+
+ if (conf->orig_offset >= local->cur_file_size) {
+ local->op_ret = 0;
+ goto put_one_call;
+ }
+ /*
+ * correct config params with real file size
+ * and actual amount of bytes read
+ */
+ set_config_offsets(frame, this,
+ conf->orig_offset, op_ret, DATA_ATOM, 0);
+
+ if (conf->orig_offset + conf->orig_size > local->cur_file_size)
+ conf->orig_size = local->cur_file_size - conf->orig_offset;
+ /*
+ * calculate amount of data to be returned
+ * to user.
+ */
+ to_user = op_ret;
+ if (conf->aligned_offset + to_user <= conf->orig_offset) {
+ gf_log(this->name, GF_LOG_WARNING, "Incomplete read");
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto put_one_call;
+ }
+ to_user -= (conf->aligned_offset - conf->orig_offset);
+
+ if (to_user > conf->orig_size)
+ to_user = conf->orig_size;
+ local->rw_count = to_user;
+
+ op_errno = set_config_avec_data(this, local,
+ conf, object, vec, count);
+ if (op_errno) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto put_one_call;
+ }
+ avec = conf->avec;
+#if DEBUG_CRYPT
+ if (conf->off_in_tail != 0 &&
+ conf->off_in_tail < object_alg_blksize(object) &&
+ object_alg_should_pad(object))
+ gf_log(this->name, GF_LOG_DEBUG, "Bad offset in tail %d",
+ conf->off_in_tail);
+ if (iovec_get_size(vec, count) != 0 &&
+ in_same_lblock(conf->orig_offset + iovec_get_size(vec, count) - 1,
+ local->cur_file_size - 1,
+ object_alg_blkbits(object))) {
+ gf_log(this->name, GF_LOG_DEBUG, "Compound last cblock");
+ dump_cblock(this,
+ (unsigned char *)(avec[conf->acount - 1].iov_base) +
+ avec[conf->acount - 1].iov_len - object_alg_blksize(object));
+ dump_cblock(this,
+ (unsigned char *)(vec[count - 1].iov_base) +
+ vec[count - 1].iov_len - object_alg_blksize(object));
+ }
+#endif
+ decrypt_aligned_iov(object, avec,
+ conf->acount, conf->aligned_offset);
+ /*
+ * pass proper plain data to user
+ */
+ avec[0].iov_base += (conf->aligned_offset - conf->orig_offset);
+ avec[0].iov_len -= (conf->aligned_offset - conf->orig_offset);
+
+ to_vec = to_user;
+ for (i = 0; i < conf->acount; i++) {
+ if (avec[i].iov_len > to_vec)
+ avec[i].iov_len = to_vec;
+ to_vec -= avec[i].iov_len;
+ }
+ put_one_call:
+ put_one_call_readv(frame, this);
+ return 0;
+}
+
+static int32_t do_readv(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict,
+ dict_t *xdata)
+{
+ data_t *data;
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto error;
+ /*
+ * extract regular file size
+ */
+ data = dict_get(dict, FSIZE_XATTR_PREFIX);
+ if (!data) {
+ gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
+ op_errno = EIO;
+ goto error;
+ }
+ local->cur_file_size = data_to_uint64(data);
+
+ get_one_call(frame);
+ STACK_WIND(frame,
+ crypt_readv_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->readv,
+ local->fd,
+ /*
+ * FIXME: read amount can be reduced
+ */
+ local->data_conf.expanded_size,
+ local->data_conf.aligned_offset,
+ local->flags,
+ local->xdata);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+
+ get_one_call(frame);
+ put_one_call_readv(frame, this);
+ return 0;
+}
+
+static int32_t crypt_readv_finodelk_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto error;
+ /*
+ * An access has been granted,
+ * retrieve file size
+ */
+ STACK_WIND(frame,
+ do_readv,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ local->fd,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ return 0;
+ error:
+ fd_unref(local->fd);
+ if (local->xdata)
+ dict_unref(local->xdata);
+ STACK_UNWIND_STRICT(readv,
+ frame,
+ -1,
+ op_errno,
+ NULL,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+ return 0;
+}
+
+static int32_t readv_trivial_completion(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *buf,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "stat failed (%d)", op_errno);
+ goto error;
+ }
+ local->buf = *buf;
+ STACK_WIND(frame,
+ load_file_size,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ local->loc,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno,
+ NULL, 0, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t crypt_readv(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset,
+ uint32_t flags, dict_t *xdata)
+{
+ int32_t ret;
+ crypt_local_t *local;
+ struct crypt_inode_info *info;
+ struct gf_flock lock = {0, };
+
+#if DEBUG_CRYPT
+ gf_log("crypt", GF_LOG_DEBUG, "reading %d bytes from offset %llu",
+ (int)size, (long long)offset);
+ if (parent_is_crypt_xlator(frame, this))
+ gf_log("crypt", GF_LOG_DEBUG, "parent is crypt");
+#endif
+ local = crypt_alloc_local(frame, this, GF_FOP_READ);
+ if (!local) {
+ ret = ENOMEM;
+ goto error;
+ }
+ if (size == 0)
+ goto trivial;
+
+ local->fd = fd_ref(fd);
+ local->flags = flags;
+
+ info = local_get_inode_info(local, this);
+ if (info == NULL) {
+ ret = EINVAL;
+ fd_unref(fd);
+ goto error;
+ }
+ if (!object_alg_atomic(&info->cinfo)) {
+ ret = EINVAL;
+ fd_unref(fd);
+ goto error;
+ }
+ set_config_offsets(frame, this, offset, size,
+ DATA_ATOM, 0);
+ if (parent_is_crypt_xlator(frame, this)) {
+ data_t *data;
+ /*
+ * We are called by crypt_writev (or cypt_ftruncate)
+ * to perform the "read" component of the read-modify-write
+ * (or read-prune-write) sequence for some atom;
+ *
+ * don't ask for access:
+ * it has already been acquired
+ *
+ * Retrieve current file size
+ */
+ if (!xdata) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "Regular file size hasn't been passed");
+ ret = EIO;
+ goto error;
+ }
+ data = dict_get(xdata, FSIZE_XATTR_PREFIX);
+ if (!data) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "Regular file size not found");
+ ret = EIO;
+ goto error;
+ }
+ local->old_file_size =
+ local->cur_file_size = data_to_uint64(data);
+
+ get_one_call(frame);
+ STACK_WIND(frame,
+ crypt_readv_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv,
+ local->fd,
+ /*
+ * FIXME: read amount can be reduced
+ */
+ local->data_conf.expanded_size,
+ local->data_conf.aligned_offset,
+ flags,
+ NULL);
+ return 0;
+ }
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = F_RDLCK;
+ lock.l_whence = SEEK_SET;
+
+ STACK_WIND(frame,
+ crypt_readv_finodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ trivial:
+ STACK_WIND(frame,
+ readv_trivial_completion,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat,
+ fd,
+ NULL);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(readv,
+ frame,
+ -1,
+ ret,
+ NULL,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+ return 0;
+}
+
+void set_local_io_params_writev(call_frame_t *frame,
+ struct object_cipher_info *object,
+ struct rmw_atom *atom,
+ off_t io_offset,
+ uint32_t io_size)
+{
+ crypt_local_t *local = frame->local;
+
+ local->io_offset = io_offset;
+ local->io_size = io_size;
+
+ local->io_offset_nopad =
+ atom->offset_at(frame, object) + atom->offset_in(frame, object);
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "set nopad offset to %llu",
+ (unsigned long long)local->io_offset_nopad);
+
+ local->io_size_nopad = atom->io_size_nopad(frame, object);
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "set nopad size to %llu",
+ (unsigned long long)local->io_size_nopad);
+
+ local->update_disk_file_size = 0;
+ /*
+ * NOTE: eof_padding_size is 0 for all full atoms;
+ * For head and tail atoms it will be set up at rmw_partial block()
+ */
+ local->new_file_size = local->cur_file_size;
+
+ if (local->io_offset_nopad + local->io_size_nopad > local->cur_file_size) {
+
+ local->new_file_size = local->io_offset_nopad + local->io_size_nopad;
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "set new file size to %llu",
+ (unsigned long long)local->new_file_size);
+
+ local->update_disk_file_size = 1;
+ }
+}
+
+void set_local_io_params_ftruncate(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ uint32_t resid;
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = &local->data_conf;
+
+ resid = conf->orig_offset & (object_alg_blksize(object) - 1);
+ if (resid) {
+ local->eof_padding_size =
+ object_alg_blksize(object) - resid;
+ local->new_file_size = conf->aligned_offset;
+ local->update_disk_file_size = 0;
+ /*
+ * file size will be updated
+ * in the ->writev() stack,
+ * when submitting file tail
+ */
+ }
+ else {
+ local->eof_padding_size = 0;
+ local->new_file_size = conf->orig_offset;
+ local->update_disk_file_size = 1;
+ /*
+ * file size will be updated
+ * in this ->ftruncate stack
+ */
+ }
+}
+
+static inline void submit_head(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+ submit_partial(frame, this, local->fd, HEAD_ATOM);
+}
+
+static inline void submit_tail(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+ submit_partial(frame, this, local->fd, TAIL_ATOM);
+}
+
+static void submit_hole(call_frame_t *frame, xlator_t *this)
+{
+ /*
+ * hole conversion always means
+ * appended write and goes in ordered fashion
+ */
+ do_ordered_submit(frame, this, HOLE_ATOM);
+}
+
+static void submit_data(call_frame_t *frame, xlator_t *this)
+{
+ if (is_ordered_mode(frame)) {
+ do_ordered_submit(frame, this, DATA_ATOM);
+ return;
+ }
+ gf_log("crypt", GF_LOG_WARNING, "Bad submit mode");
+ get_nr_calls(frame, nr_calls_data(frame));
+ do_parallel_submit(frame, this, DATA_ATOM);
+ return;
+}
+
+/*
+ * heplers called by writev_cbk, fruncate_cbk in ordered mode
+ */
+
+static inline int32_t should_submit_hole(crypt_local_t *local)
+{
+ struct avec_config *conf = &local->hole_conf;
+
+ return conf->avec != NULL;
+}
+
+static inline int32_t should_resume_submit_hole(crypt_local_t *local)
+{
+ struct avec_config *conf = &local->hole_conf;
+
+ if (local->fop == GF_FOP_WRITE && has_tail_block(conf))
+ /*
+ * Don't submit a part of hole, which
+ * fits into a data block:
+ * this part of hole will be converted
+ * as a gap filled by zeros in data head
+ * block.
+ */
+ return conf->cursor < conf->acount - 1;
+ else
+ return conf->cursor < conf->acount;
+}
+
+static inline int32_t should_resume_submit_data(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = &local->data_conf;
+
+ if (is_ordered_mode(frame))
+ return conf->cursor < conf->acount;
+ /*
+ * parallel writes
+ */
+ return 0;
+}
+
+static inline int32_t should_submit_data_after_hole(crypt_local_t *local)
+{
+ return local->data_conf.avec != NULL;
+}
+
+static void update_local_file_params(call_frame_t *frame,
+ xlator_t *this,
+ struct iatt *prebuf,
+ struct iatt *postbuf)
+{
+ crypt_local_t *local = frame->local;
+
+ check_buf(frame, this, postbuf);
+
+ local->prebuf = *prebuf;
+ local->postbuf = *postbuf;
+
+ local->prebuf.ia_size = local->cur_file_size;
+ local->postbuf.ia_size = local->new_file_size;
+
+ local->cur_file_size = local->new_file_size;
+}
+
+static int32_t end_writeback_writev(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret <= 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "writev iteration failed");
+ goto put_one_call;
+ }
+ /*
+ * op_ret includes paddings (atom's head, atom's tail and EOF)
+ */
+ if (op_ret < local->io_size) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Incomplete writev iteration");
+ goto put_one_call;
+ }
+ op_ret -= local->eof_padding_size;
+ local->op_ret = op_ret;
+
+ update_local_file_params(frame, this, prebuf, postbuf);
+
+ if (data_write_in_progress(local)) {
+
+ LOCK(&local->rw_count_lock);
+ local->rw_count += op_ret;
+ UNLOCK(&local->rw_count_lock);
+
+ if (should_resume_submit_data(frame))
+ submit_data(frame, this);
+ }
+ else {
+ /*
+ * hole conversion is going on;
+ * don't take into account written zeros
+ */
+ if (should_resume_submit_hole(local))
+ submit_hole(frame, this);
+
+ else if (should_submit_data_after_hole(local))
+ submit_data(frame, this);
+ }
+ put_one_call:
+ put_one_call_writev(frame, this);
+ return 0;
+}
+
+#define crypt_writev_cbk end_writeback_writev
+
+#define HOLE_WRITE_CHUNK_BITS 12
+#define HOLE_WRITE_CHUNK_SIZE (1 << HOLE_WRITE_CHUNK_BITS)
+
+/*
+ * Convert hole of size @size at offset @off to
+ * zeros and prepare respective iovecs for submit.
+ * The hole lock should be held.
+ *
+ * Pre-conditions:
+ * @local->file_size is set and valid.
+ */
+int32_t prepare_for_submit_hole(call_frame_t *frame, xlator_t *this,
+ uint64_t off, off_t size)
+{
+ int32_t ret;
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+
+ set_config_offsets(frame, this, off, size, HOLE_ATOM, 1);
+
+ ret = set_config_avec_hole(this, local,
+ &local->hole_conf, object, local->fop);
+ crypt_check_conf(&local->hole_conf);
+
+ return ret;
+}
+
+/*
+ * prepare for submit @count bytes at offset @from
+ */
+int32_t prepare_for_submit_data(call_frame_t *frame, xlator_t *this,
+ off_t from, int32_t size, struct iovec *vec,
+ int32_t vec_count, int32_t setup_gap)
+{
+ uint32_t ret;
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+
+ set_config_offsets(frame, this, from, size,
+ DATA_ATOM, setup_gap);
+
+ ret = set_config_avec_data(this, local,
+ &local->data_conf, object, vec, vec_count);
+ crypt_check_conf(&local->data_conf);
+
+ return ret;
+}
+
+static void free_avec(struct iovec *avec,
+ char **pool, int blocks_in_pool)
+{
+ if (!avec)
+ return;
+ GF_FREE(pool);
+ GF_FREE(avec);
+}
+
+static void free_avec_data(crypt_local_t *local)
+{
+ return free_avec(local->data_conf.avec,
+ local->data_conf.pool,
+ local->data_conf.blocks_in_pool);
+}
+
+static void free_avec_hole(crypt_local_t *local)
+{
+ return free_avec(local->hole_conf.avec,
+ local->hole_conf.pool,
+ local->hole_conf.blocks_in_pool);
+}
+
+
+static void do_parallel_submit(call_frame_t *frame, xlator_t *this,
+ atom_data_type dtype)
+{
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf;
+
+ local->active_setup = dtype;
+ conf = conf_by_type(frame, dtype);
+
+ if (has_head_block(conf))
+ submit_head(frame, this);
+
+ if (has_full_blocks(conf))
+ submit_full(frame, this);
+
+ if (has_tail_block(conf))
+ submit_tail(frame, this);
+ return;
+}
+
+static void do_ordered_submit(call_frame_t *frame, xlator_t *this,
+ atom_data_type dtype)
+{
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf;
+
+ local->active_setup = dtype;
+ conf = conf_by_type(frame, dtype);
+
+ if (should_submit_head_block(conf)) {
+ get_one_call_nolock(frame);
+ submit_head(frame, this);
+ }
+ else if (should_submit_full_block(conf)) {
+ get_one_call_nolock(frame);
+ submit_full(frame, this);
+ }
+ else if (should_submit_tail_block(conf)) {
+ get_one_call_nolock(frame);
+ submit_tail(frame, this);
+ }
+ else
+ gf_log("crypt", GF_LOG_DEBUG,
+ "nothing has been submitted in ordered mode");
+ return;
+}
+
+static int32_t do_writev(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict,
+ dict_t *xdata)
+{
+ data_t *data;
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+ /*
+ * extract regular file size
+ */
+ data = dict_get(dict, FSIZE_XATTR_PREFIX);
+ if (!data) {
+ gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
+ op_ret = -1;
+ op_errno = EIO;
+ goto error;
+ }
+ local->old_file_size = local->cur_file_size = data_to_uint64(data);
+
+ set_gap_at_end(frame, object, &local->data_conf, DATA_ATOM);
+
+ if (local->cur_file_size < local->data_conf.orig_offset) {
+ /*
+ * Set up hole config
+ */
+ op_errno = prepare_for_submit_hole(frame,
+ this,
+ local->cur_file_size,
+ local->data_conf.orig_offset - local->cur_file_size);
+ if (op_errno) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto error;
+ }
+ }
+ if (should_submit_hole(local))
+ submit_hole(frame, this);
+ else
+ submit_data(frame, this);
+ return 0;
+ error:
+ get_one_call_nolock(frame);
+ put_one_call_writev(frame, this);
+ return 0;
+}
+
+static int32_t crypt_writev_finodelk_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0)
+ goto error;
+ /*
+ * An access has been granted,
+ * retrieve file size first
+ */
+ STACK_WIND(frame,
+ do_writev,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ local->fd,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ return 0;
+ error:
+ get_one_call_nolock(frame);
+ put_one_call_writev(frame, this);
+ return 0;
+}
+
+static int32_t writev_trivial_completion(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *buf,
+ dict_t *dict)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ local->prebuf = *buf;
+ local->postbuf = *buf;
+
+ local->prebuf.ia_size = local->cur_file_size;
+ local->postbuf.ia_size = local->cur_file_size;
+
+ get_one_call(frame);
+ put_one_call_writev(frame, this);
+ return 0;
+}
+
+int crypt_writev(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vec,
+ int32_t count,
+ off_t offset,
+ uint32_t flags,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ int32_t ret;
+ crypt_local_t *local;
+ struct crypt_inode_info *info;
+ struct gf_flock lock = {0, };
+#if DEBUG_CRYPT
+ gf_log ("crypt", GF_LOG_DEBUG, "writing %d bytes from offset %llu",
+ (int)iovec_get_size(vec, count), (long long)offset);
+#endif
+ local = crypt_alloc_local(frame, this, GF_FOP_WRITE);
+ if (!local) {
+ ret = ENOMEM;
+ goto error;
+ }
+ local->fd = fd_ref(fd);
+
+ if (iobref)
+ local->iobref = iobref_ref(iobref);
+ /*
+ * to update real file size on the server
+ */
+ local->xattr = dict_new();
+ if (!local->xattr) {
+ ret = ENOMEM;
+ goto error;
+ }
+ local->flags = flags;
+
+ info = local_get_inode_info(local, this);
+ if (info == NULL) {
+ ret = EINVAL;
+ goto error;
+ }
+ if (!object_alg_atomic(&info->cinfo)) {
+ ret = EINVAL;
+ goto error;
+ }
+ if (iovec_get_size(vec, count) == 0)
+ goto trivial;
+
+ ret = prepare_for_submit_data(frame, this, offset,
+ iovec_get_size(vec, count),
+ vec, count, 0 /* don't setup gup
+ in tail: we don't
+ know file size yet */);
+ if (ret)
+ goto error;
+
+ if (parent_is_crypt_xlator(frame, this)) {
+ data_t *data;
+ /*
+ * we are called by shinking crypt_ftruncate(),
+ * which doesn't perform hole conversion;
+ *
+ * don't ask for access:
+ * it has already been acquired
+ */
+
+ /*
+ * extract file size
+ */
+ if (!xdata) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "Regular file size hasn't been passed");
+ ret = EIO;
+ goto error;
+ }
+ data = dict_get(xdata, FSIZE_XATTR_PREFIX);
+ if (!data) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "Regular file size not found");
+ ret = EIO;
+ goto error;
+ }
+ local->old_file_size =
+ local->cur_file_size = data_to_uint64(data);
+
+ submit_data(frame, this);
+ return 0;
+ }
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+ /*
+ * lock the file and retrieve its size
+ */
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+
+ STACK_WIND(frame,
+ crypt_writev_finodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ trivial:
+ STACK_WIND(frame,
+ writev_trivial_completion,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat,
+ fd,
+ NULL);
+ return 0;
+ error:
+ if (local && local->fd)
+ fd_unref(fd);
+ if (local && local->iobref)
+ iobref_unref(iobref);
+ if (local && local->xdata)
+ dict_unref(xdata);
+ if (local && local->xattr)
+ dict_unref(local->xattr);
+ if (local && local->info)
+ free_inode_info(local->info);
+
+ STACK_UNWIND_STRICT(writev, frame, -1, ret, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t prepare_for_prune(call_frame_t *frame, xlator_t *this, uint64_t offset)
+{
+ set_config_offsets(frame, this,
+ offset,
+ 0, /* count */
+ DATA_ATOM,
+ 0 /* since we prune, there is no
+ gap in tail to uptodate */);
+ return 0;
+}
+
+/*
+ * Finish the read-prune-modify sequence
+ *
+ * Can be invoked as
+ * 1) ->ftruncate_cbk() for cblock-aligned, or trivial prune
+ * 2) ->writev_cbk() for non-cblock-aligned prune
+ */
+
+static int32_t prune_complete(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ update_local_file_params(frame, this, prebuf, postbuf);
+
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+/*
+ * This is called as ->ftruncate_cbk()
+ *
+ * Perform the "write" component of the
+ * read-prune-write sequence.
+ *
+ * submuit the rest of the file
+ */
+static int32_t prune_submit_file_tail(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = &local->data_conf;
+ dict_t *dict;
+
+ if (op_ret < 0)
+ goto put_one_call;
+
+ if (local->xdata) {
+ dict_unref(local->xdata);
+ local->xdata = NULL;
+ }
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ dict = dict_new();
+ if (!dict) {
+ op_errno = ENOMEM;
+ goto error;
+ }
+
+ update_local_file_params(frame, this, prebuf, postbuf);
+ local->new_file_size = conf->orig_offset;
+
+ /*
+ * The rest of the file is a partial block and, hence,
+ * should be written via RMW sequence, so the crypt xlator
+ * does STACK_WIND to itself.
+ *
+ * Pass current file size to crypt_writev()
+ */
+ op_errno = dict_set(dict,
+ FSIZE_XATTR_PREFIX,
+ data_from_uint64(local->cur_file_size));
+ if (op_errno) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "can not set key to update file size");
+ dict_unref(dict);
+ goto error;
+ }
+ gf_log("crypt", GF_LOG_DEBUG,
+ "passing current file size (%llu) to crypt_writev",
+ (unsigned long long)local->cur_file_size);
+ /*
+ * Padding will be filled with
+ * zeros by rmw_partial_block()
+ */
+ STACK_WIND(frame,
+ prune_complete,
+ this,
+ this->fops->writev, /* crypt_writev */
+ local->fd,
+ &local->vec,
+ 1,
+ conf->aligned_offset, /* offset to write from */
+ 0,
+ local->iobref,
+ dict);
+
+ dict_unref(dict);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ put_one_call:
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+/*
+ * This is called as a callback of ->writev() invoked in behalf
+ * of ftruncate(): it can be
+ * 1) ordered writes issued by hole conversion in the case of
+ * expanded truncate, or
+ * 2) an rmw partial data block issued by non-cblock-aligned
+ * prune.
+ */
+int32_t end_writeback_ftruncate(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ /*
+ * if nothing has been written,
+ * then it must be an error
+ */
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0)
+ goto put_one_call;
+
+ update_local_file_params(frame, this, prebuf, postbuf);
+
+ if (data_write_in_progress(local))
+ /* case (2) */
+ goto put_one_call;
+ /* case (1) */
+ if (should_resume_submit_hole(local))
+ submit_hole(frame, this);
+ /*
+ * case of hole, when we should't resume
+ */
+ put_one_call:
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+/*
+ * Perform prune and write components of the
+ * read-prune-write sequence.
+ *
+ * Called as ->readv_cbk()
+ *
+ * Pre-conditions:
+ * @vec contains the latest atom of the file
+ * (plain text)
+ */
+static int32_t prune_write(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ int32_t i;
+ size_t to_copy;
+ size_t copied = 0;
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = &local->data_conf;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ if (op_ret == -1)
+ goto put_one_call;
+
+ /*
+ * At first, uptodate head block
+ */
+ if (iovec_get_size(vec, count) < conf->off_in_head) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Failed to uptodate head block for prune");
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto put_one_call;
+ }
+ local->vec.iov_len = conf->off_in_head;
+ local->vec.iov_base = GF_CALLOC(1, local->vec.iov_len,
+ gf_crypt_mt_data);
+
+ if (local->vec.iov_base == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ }
+ for (i = 0; i < count; i++) {
+ to_copy = vec[i].iov_len;
+ if (to_copy > local->vec.iov_len - copied)
+ to_copy = local->vec.iov_len - copied;
+
+ memcpy((char *)local->vec.iov_base + copied,
+ vec[i].iov_base,
+ to_copy);
+ copied += to_copy;
+ if (copied == local->vec.iov_len)
+ break;
+ }
+ /*
+ * perform prune with aligned offset
+ * (i.e. at this step we prune a bit
+ * more then it is needed
+ */
+ STACK_WIND(frame,
+ prune_submit_file_tail,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate,
+ local->fd,
+ conf->aligned_offset,
+ local->xdata);
+ return 0;
+ put_one_call:
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+/*
+ * Perform a read-prune-write sequence
+ */
+int32_t read_prune_write(call_frame_t *frame, xlator_t *this)
+{
+ int32_t ret = 0;
+ dict_t *dict = NULL;
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = &local->data_conf;
+ struct object_cipher_info *object = &local->info->cinfo;
+
+ set_local_io_params_ftruncate(frame, object);
+ get_one_call_nolock(frame);
+
+ if ((conf->orig_offset & (object_alg_blksize(object) - 1)) == 0) {
+ /*
+ * cblock-aligned prune:
+ * we don't need read and write components,
+ * just cut file body
+ */
+ gf_log("crypt", GF_LOG_DEBUG,
+ "prune without RMW (at offset %llu",
+ (unsigned long long)conf->orig_offset);
+
+ STACK_WIND(frame,
+ prune_complete,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate,
+ local->fd,
+ conf->orig_offset,
+ local->xdata);
+ return 0;
+ }
+ gf_log("crypt", GF_LOG_DEBUG,
+ "prune with RMW (at offset %llu",
+ (unsigned long long)conf->orig_offset);
+ /*
+ * We are about to perform the "read" component of the
+ * read-prune-write sequence. It means that we need to
+ * read encrypted data from disk and decrypt it.
+ * So, the crypt translator does STACK_WIND to itself.
+ *
+ * Pass current file size to crypt_readv()
+
+ */
+ dict = dict_new();
+ if (!dict) {
+ gf_log("crypt", GF_LOG_WARNING, "Can not alloc dict");
+ ret = ENOMEM;
+ goto exit;
+ }
+ ret = dict_set(dict,
+ FSIZE_XATTR_PREFIX,
+ data_from_uint64(local->cur_file_size));
+ if (ret) {
+ gf_log("crypt", GF_LOG_WARNING, "Can not set dict");
+ goto exit;
+ }
+ STACK_WIND(frame,
+ prune_write,
+ this,
+ this->fops->readv, /* crypt_readv */
+ local->fd,
+ get_atom_size(object), /* bytes to read */
+ conf->aligned_offset, /* offset to read from */
+ 0,
+ dict);
+ exit:
+ if (dict)
+ dict_unref(dict);
+ return ret;
+}
+
+/*
+ * File prune is more complicated than expand.
+ * First we need to read the latest atom to not lose info
+ * needed for proper update. Also we need to make sure that
+ * every component of read-prune-write sequence leaves data
+ * consistent
+ *
+ * Non-cblock aligned prune is performed as read-prune-write
+ * sequence:
+ *
+ * 1) read the latest atom;
+ * 2) perform cblock-aligned prune
+ * 3) issue a write request for the end-of-file
+ */
+int32_t prune_file(call_frame_t *frame, xlator_t *this, uint64_t offset)
+{
+ int32_t ret;
+
+ ret = prepare_for_prune(frame, this, offset);
+ if (ret)
+ return ret;
+ return read_prune_write(frame, this);
+}
+
+int32_t expand_file(call_frame_t *frame, xlator_t *this,
+ uint64_t offset)
+{
+ int32_t ret;
+ crypt_local_t *local = frame->local;
+
+ ret = prepare_for_submit_hole(frame, this,
+ local->old_file_size,
+ offset - local->old_file_size);
+ if (ret)
+ return ret;
+ submit_hole(frame, this);
+ return 0;
+}
+
+static int32_t ftruncate_trivial_completion(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *buf,
+ dict_t *dict)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ local->prebuf = *buf;
+ local->postbuf = *buf;
+
+ local->prebuf.ia_size = local->cur_file_size;
+ local->postbuf.ia_size = local->cur_file_size;
+
+ get_one_call(frame);
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+static int32_t do_ftruncate(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict,
+ dict_t *xdata)
+{
+ data_t *data;
+ crypt_local_t *local = frame->local;
+
+ if (op_ret)
+ goto error;
+ /*
+ * extract regular file size
+ */
+ data = dict_get(dict, FSIZE_XATTR_PREFIX);
+ if (!data) {
+ gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
+ op_errno = EIO;
+ goto error;
+ }
+ local->old_file_size = local->cur_file_size = data_to_uint64(data);
+
+ if (local->data_conf.orig_offset == local->cur_file_size) {
+#if DEBUG_CRYPT
+ gf_log("crypt", GF_LOG_DEBUG,
+ "trivial ftruncate (current file size %llu)",
+ (unsigned long long)local->cur_file_size);
+#endif
+ goto trivial;
+ }
+ else if (local->data_conf.orig_offset < local->cur_file_size) {
+#if DEBUG_CRYPT
+ gf_log("crypt", GF_LOG_DEBUG, "prune from %llu to %llu",
+ (unsigned long long)local->cur_file_size,
+ (unsigned long long)local->data_conf.orig_offset);
+#endif
+ op_errno = prune_file(frame,
+ this,
+ local->data_conf.orig_offset);
+ }
+ else {
+#if DEBUG_CRYPT
+ gf_log("crypt", GF_LOG_DEBUG, "expand from %llu to %llu",
+ (unsigned long long)local->cur_file_size,
+ (unsigned long long)local->data_conf.orig_offset);
+#endif
+ op_errno = expand_file(frame,
+ this,
+ local->data_conf.orig_offset);
+ }
+ if (op_errno)
+ goto error;
+ return 0;
+ trivial:
+ STACK_WIND(frame,
+ ftruncate_trivial_completion,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat,
+ local->fd,
+ NULL);
+ return 0;
+ error:
+ /*
+ * finish with ftruncate
+ */
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+
+ get_one_call_nolock(frame);
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+static int32_t crypt_ftruncate_finodelk_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0)
+ goto error;
+ /*
+ * An access has been granted,
+ * retrieve file size first
+ */
+ STACK_WIND(frame,
+ do_ftruncate,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ local->fd,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ return 0;
+ error:
+ get_one_call_nolock(frame);
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+/*
+ * ftruncate is performed in 2 steps:
+ * . recieve file size;
+ * . expand or prune file.
+ */
+static int32_t crypt_ftruncate(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset,
+ dict_t *xdata)
+{
+ int32_t ret;
+ crypt_local_t *local;
+ struct crypt_inode_info *info;
+ struct gf_flock lock = {0, };
+
+ local = crypt_alloc_local(frame, this, GF_FOP_FTRUNCATE);
+ if (!local) {
+ ret = ENOMEM;
+ goto error;
+ }
+ local->xattr = dict_new();
+ if (!local->xattr) {
+ ret = ENOMEM;
+ goto error;
+ }
+ local->fd = fd_ref(fd);
+ info = local_get_inode_info(local, this);
+ if (info == NULL) {
+ ret = EINVAL;
+ goto error;
+ }
+ if (!object_alg_atomic(&info->cinfo)) {
+ ret = EINVAL;
+ goto error;
+ }
+ local->data_conf.orig_offset = offset;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+
+ STACK_WIND(frame,
+ crypt_ftruncate_finodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ error:
+ if (local && local->fd)
+ fd_unref(fd);
+ if (local && local->xdata)
+ dict_unref(xdata);
+ if (local && local->xattr)
+ dict_unref(local->xattr);
+ if (local && local->info)
+ free_inode_info(local->info);
+
+ STACK_UNWIND_STRICT(ftruncate, frame, -1, ret, NULL, NULL, NULL);
+ return 0;
+}
+
+/* ->flush_cbk() */
+int32_t truncate_end(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ STACK_UNWIND_STRICT(truncate,
+ frame,
+ op_ret,
+ op_errno,
+ &local->prebuf,
+ &local->postbuf,
+ local->xdata);
+ return 0;
+}
+
+/* ftruncate_cbk() */
+int32_t truncate_flush(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ fd_t *fd = local->fd;
+ local->prebuf = *prebuf;
+ local->postbuf = *postbuf;
+
+ STACK_WIND(frame,
+ truncate_end,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ fd,
+ NULL);
+ fd_unref(fd);
+ return 0;
+}
+
+/*
+ * is called as ->open_cbk()
+ */
+static int32_t truncate_begin(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0) {
+ fd_unref(fd);
+ STACK_UNWIND_STRICT(truncate,
+ frame,
+ op_ret,
+ op_errno, NULL, NULL, NULL);
+ return 0;
+ }
+ /*
+ * crypt_truncate() is implemented via crypt_ftruncate(),
+ * so the crypt xlator does STACK_WIND to itself here
+ */
+ STACK_WIND(frame,
+ truncate_flush,
+ this,
+ this->fops->ftruncate, /* crypt_ftruncate */
+ fd,
+ local->offset,
+ NULL);
+ return 0;
+}
+
+/*
+ * crypt_truncate() is implemented via crypt_ftruncate() as a
+ * sequence crypt_open() - crypt_ftruncate() - truncate_flush()
+ */
+int32_t crypt_truncate(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ off_t offset,
+ dict_t *xdata)
+{
+ fd_t *fd;
+ crypt_local_t *local;
+
+#if DEBUG_CRYPT
+ gf_log(this->name, GF_LOG_DEBUG,
+ "truncate file %s at offset %llu",
+ loc->path, (unsigned long long)offset);
+#endif
+ local = crypt_alloc_local(frame, this, GF_FOP_TRUNCATE);
+ if (!local)
+ goto error;
+
+ fd = fd_create(loc->inode, frame->root->pid);
+ if (!fd) {
+ gf_log(this->name, GF_LOG_ERROR, "Can not create fd");
+ goto error;
+ }
+ local->fd = fd;
+ local->offset = offset;
+ local->xdata = xdata;
+ STACK_WIND(frame,
+ truncate_begin,
+ this,
+ this->fops->open, /* crypt_open() */
+ loc,
+ O_RDWR,
+ fd,
+ NULL);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(truncate, frame, -1, EINVAL, NULL, NULL, NULL);
+ return 0;
+}
+
+end_writeback_handler_t dispatch_end_writeback(glusterfs_fop_t fop)
+{
+ switch (fop) {
+ case GF_FOP_WRITE:
+ return end_writeback_writev;
+ case GF_FOP_FTRUNCATE:
+ return end_writeback_ftruncate;
+ default:
+ gf_log("crypt", GF_LOG_WARNING, "Bad wb operation %d", fop);
+ return NULL;
+ }
+}
+
+/*
+ * true, if the caller needs metadata string
+ */
+static int32_t is_custom_mtd(dict_t *xdata)
+{
+ data_t *data;
+ uint32_t flags;
+
+ if (!xdata)
+ return 0;
+
+ data = dict_get(xdata, MSGFLAGS_PREFIX);
+ if (!data)
+ return 0;
+ if (data->len != sizeof(uint32_t)) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "Bad msgflags size (%d)", data->len);
+ return -1;
+ }
+ flags = *((uint32_t *)data->data);
+ return msgflags_check_mtd_lock(&flags);
+}
+
+static int32_t crypt_open_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ if (op_ret < 0)
+ gf_log(this->name, GF_LOG_WARNING, "mtd unlock failed (%d)",
+ op_errno);
+ put_one_call_open(frame);
+ return 0;
+}
+
+static void crypt_open_tail(call_frame_t *frame, xlator_t *this)
+{
+ struct gf_flock lock = {0, };
+ crypt_local_t *local = frame->local;
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND(frame,
+ crypt_open_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+}
+
+/*
+ * load private inode info at open time
+ * called as ->fgetxattr_cbk()
+ */
+static int load_mtd_open(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict,
+ dict_t *xdata)
+{
+ int32_t ret;
+ gf_boolean_t upload_info;
+ data_t *mtd;
+ uint64_t value = 0;
+ struct crypt_inode_info *info;
+ crypt_local_t *local = frame->local;
+ crypt_private_t *priv = this->private;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (local->fd->inode->ia_type == IA_IFLNK)
+ goto exit;
+ if (op_ret < 0)
+ goto exit;
+ /*
+ * first, check for cached info
+ */
+ ret = inode_ctx_get(local->fd->inode, this, &value);
+ if (ret != -1) {
+ info = (struct crypt_inode_info *)(long)value;
+ if (info == NULL) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Inode info expected, but not found");
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto exit;
+ }
+ /*
+ * info has been found in the cache
+ */
+ upload_info = _gf_false;
+ }
+ else {
+ /*
+ * info hasn't been found in the cache.
+ */
+ info = alloc_inode_info(local, local->loc);
+ if (!info) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto exit;
+ }
+ init_inode_info_head(info, local->fd);
+ upload_info = _gf_true;
+ }
+ /*
+ * extract metadata
+ */
+ mtd = dict_get(dict, CRYPTO_FORMAT_PREFIX);
+ if (!mtd) {
+ local->op_ret = -1;
+ local->op_errno = ENOENT;
+ gf_log (this->name, GF_LOG_WARNING,
+ "Format string wasn't found");
+ goto exit;
+ }
+ /*
+ * authenticate metadata against the path
+ */
+ ret = open_format((unsigned char *)mtd->data,
+ mtd->len,
+ local->loc,
+ info,
+ get_master_cinfo(priv),
+ local,
+ upload_info);
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = ret;
+ goto exit;
+ }
+ if (upload_info) {
+ ret = init_inode_info_tail(info, get_master_cinfo(priv));
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = ret;
+ goto exit;
+ }
+ ret = inode_ctx_put(local->fd->inode,
+ this, (uint64_t)(long)info);
+ if (ret == -1) {
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto exit;
+ }
+ }
+ if (local->custom_mtd) {
+ /*
+ * pass the metadata string to the customer
+ */
+ ret = dict_set_static_bin(local->xdata,
+ CRYPTO_FORMAT_PREFIX,
+ mtd->data,
+ mtd->len);
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = ret;
+ goto exit;
+ }
+ }
+ exit:
+ if (!local->custom_mtd)
+ crypt_open_tail(frame, this);
+ else
+ put_one_call_open(frame);
+ return 0;
+}
+
+static int32_t crypt_open_finodelk_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING, "finodelk (LOCK) failed");
+ goto exit;
+ }
+ STACK_WIND(frame,
+ load_mtd_open,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ local->fd,
+ CRYPTO_FORMAT_PREFIX,
+ NULL);
+ return 0;
+ exit:
+ put_one_call_open(frame);
+ return 0;
+}
+
+/*
+ * verify metadata against the specified pathname
+ */
+static int32_t crypt_open_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ dict_t *xdata)
+{
+ struct gf_flock lock = {0, };
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (local->fd->inode->ia_type == IA_IFLNK)
+ goto exit;
+ if (op_ret < 0)
+ goto exit;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+ else if (local->custom_mtd){
+ local->xdata = dict_new();
+ if (!local->xdata) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ gf_log ("crypt", GF_LOG_ERROR,
+ "Can not get new dict for mtd string");
+ goto exit;
+ }
+ }
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = local->custom_mtd ? F_WRLCK : F_RDLCK;
+ lock.l_whence = SEEK_SET;
+
+ STACK_WIND(frame,
+ crypt_open_finodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ exit:
+ put_one_call_open(frame);
+ return 0;
+}
+
+static int32_t crypt_open(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags,
+ fd_t *fd,
+ dict_t *xdata)
+{
+ int32_t ret = ENOMEM;
+ crypt_local_t *local;
+
+ local = crypt_alloc_local(frame, this, GF_FOP_OPEN);
+ if (!local)
+ goto error;
+ local->loc = GF_CALLOC(1, sizeof(*loc), gf_crypt_mt_loc);
+ if (!local->loc) {
+ ret = ENOMEM;
+ goto error;
+ }
+ memset(local->loc, 0, sizeof(*local->loc));
+ ret = loc_copy(local->loc, loc);
+ if (ret) {
+ GF_FREE(local->loc);
+ goto error;
+ }
+ local->fd = fd_ref(fd);
+
+ ret = is_custom_mtd(xdata);
+ if (ret < 0) {
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ ret = EINVAL;
+ goto error;
+ }
+ local->custom_mtd = ret;
+
+ if ((flags & O_ACCMODE) == O_WRONLY)
+ /*
+ * we can't open O_WRONLY, because
+ * we need to do read-modify-write
+ */
+ flags = (flags & ~O_ACCMODE) | O_RDWR;
+ /*
+ * Make sure that out translated offsets
+ * and counts won't be ignored
+ */
+ flags &= ~O_APPEND;
+ get_one_call_nolock(frame);
+ STACK_WIND(frame,
+ crypt_open_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open,
+ loc,
+ flags,
+ fd,
+ xdata);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(open,
+ frame,
+ -1,
+ ret,
+ NULL,
+ NULL);
+ return 0;
+}
+
+static int32_t init_inode_info_tail(struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ int32_t ret;
+ struct object_cipher_info *object = &info->cinfo;
+
+#if DEBUG_CRYPT
+ gf_log("crypt", GF_LOG_DEBUG, "Init inode info for object %s",
+ uuid_utoa(info->oid));
+#endif
+ ret = data_cipher_algs[object->o_alg][object->o_mode].set_private(info,
+ master);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, "Set private info failed");
+ return ret;
+ }
+ return 0;
+}
+
+/*
+ * Init inode info at ->create() time
+ */
+static void init_inode_info_create(struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ data_t *data)
+{
+ struct object_cipher_info *object;
+
+ info->nr_minor = CRYPT_XLATOR_ID;
+ memcpy(info->oid, data->data, data->len);
+
+ object = &info->cinfo;
+
+ object->o_alg = master->m_alg;
+ object->o_mode = master->m_mode;
+ object->o_block_bits = master->m_block_bits;
+ object->o_dkey_size = master->m_dkey_size;
+}
+
+static void init_inode_info_head(struct crypt_inode_info *info, fd_t *fd)
+{
+ memcpy(info->oid, fd->inode->gfid, sizeof(uuid_t));
+}
+
+static int32_t crypt_create_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ crypt_private_t *priv = this->private;
+ crypt_local_t *local = frame->local;
+ struct crypt_inode_info *info = local->info;
+ fd_t *local_fd = local->fd;
+ dict_t *local_xdata = local->xdata;
+ inode_t *local_inode = local->inode;
+
+ if (op_ret < 0) {
+ free_inode_info(info);
+ goto unwind;
+ }
+ op_errno = init_inode_info_tail(info, get_master_cinfo(priv));
+ if (op_errno) {
+ op_ret = -1;
+ free_inode_info(info);
+ goto unwind;
+ }
+ /*
+ * FIXME: drop major subversion number
+ */
+ op_ret = inode_ctx_put(local->fd->inode, this, (uint64_t)(long)info);
+ if (op_ret == -1) {
+ op_errno = EIO;
+ free_inode_info(info);
+ goto unwind;
+ }
+ unwind:
+ free_format(local);
+ STACK_UNWIND_STRICT(create,
+ frame,
+ op_ret,
+ op_errno,
+ local_fd,
+ local_inode,
+ &local->buf,
+ &local->prebuf,
+ &local->postbuf,
+ local_xdata);
+ fd_unref(local_fd);
+ inode_unref(local_inode);
+ if (local_xdata)
+ dict_unref(local_xdata);
+ return 0;
+}
+
+static int crypt_create_tail(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ struct gf_flock lock = {0, };
+ crypt_local_t *local = frame->local;
+ fd_t *local_fd = local->fd;
+ dict_t *local_xdata = local->xdata;
+ inode_t *local_inode = local->inode;
+
+ dict_unref(local->xattr);
+
+ if (op_ret < 0)
+ goto error;
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND(frame,
+ crypt_create_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ error:
+ free_inode_info(local->info);
+ free_format(local);
+
+ STACK_UNWIND_STRICT(create,
+ frame,
+ op_ret,
+ op_errno,
+ local_fd,
+ local_inode,
+ &local->buf,
+ &local->prebuf,
+ &local->postbuf,
+ local_xdata);
+
+ fd_unref(local_fd);
+ inode_unref(local_inode);
+ if (local_xdata)
+ dict_unref(local_xdata);
+ return 0;
+}
+
+static int32_t crypt_create_finodelk_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ struct crypt_inode_info *info = local->info;
+
+ if (op_ret < 0)
+ goto error;
+
+ STACK_WIND(frame,
+ crypt_create_tail,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ local->fd,
+ local->xattr, /* CRYPTO_FORMAT_PREFIX */
+ 0,
+ NULL);
+ return 0;
+ error:
+ free_inode_info(info);
+ free_format(local);
+ fd_unref(local->fd);
+ dict_unref(local->xattr);
+ if (local->xdata)
+ dict_unref(local->xdata);
+
+ STACK_UNWIND_STRICT(create,
+ frame,
+ op_ret,
+ op_errno,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL);
+ return 0;
+}
+
+/*
+ * Create and store crypt-specific format on disk;
+ * Populate cache with private inode info
+ */
+static int32_t crypt_create_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ inode_t *inode,
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent,
+ dict_t *xdata)
+{
+ struct gf_flock lock = {0, };
+ crypt_local_t *local = frame->local;
+ struct crypt_inode_info *info = local->info;
+
+ if (op_ret < 0)
+ goto error;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+ local->inode = inode_ref(inode);
+ local->buf = *buf;
+ local->prebuf = *preparent;
+ local->postbuf = *postparent;
+
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+
+ STACK_WIND(frame,
+ crypt_create_finodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ error:
+ free_inode_info(info);
+ free_format(local);
+ fd_unref(local->fd);
+ dict_unref(local->xattr);
+
+ STACK_UNWIND_STRICT(create,
+ frame,
+ op_ret,
+ op_errno,
+ NULL, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
+}
+
+static int32_t crypt_create(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags,
+ mode_t mode,
+ mode_t umask,
+ fd_t *fd,
+ dict_t *xdata)
+{
+ int ret;
+ data_t *data;
+ crypt_local_t *local;
+ crypt_private_t *priv;
+ struct master_cipher_info *master;
+ struct crypt_inode_info *info;
+
+ priv = this->private;
+ master = get_master_cinfo(priv);
+
+ if (master_alg_atomic(master)) {
+ /*
+ * We can't open O_WRONLY, because we
+ * need to do read-modify-write.
+ */
+ if ((flags & O_ACCMODE) == O_WRONLY)
+ flags = (flags & ~O_ACCMODE) | O_RDWR;
+ /*
+ * Make sure that out translated offsets
+ * and counts won't be ignored
+ */
+ flags &= ~O_APPEND;
+ }
+ local = crypt_alloc_local(frame, this, GF_FOP_CREATE);
+ if (!local) {
+ ret = ENOMEM;
+ goto error;
+ }
+ data = dict_get(xdata, "gfid-req");
+ if (!data) {
+ ret = EINVAL;
+ gf_log("crypt", GF_LOG_WARNING, "gfid not found");
+ goto error;
+ }
+ if (data->len != sizeof(uuid_t)) {
+ ret = EINVAL;
+ gf_log("crypt", GF_LOG_WARNING,
+ "bad gfid size (%d), should be %d",
+ (int)data->len, (int)sizeof(uuid_t));
+ goto error;
+ }
+ info = alloc_inode_info(local, loc);
+ if (!info){
+ ret = ENOMEM;
+ goto error;
+ }
+ /*
+ * NOTE:
+ * format has to be created BEFORE
+ * proceeding to the untrusted server
+ */
+ ret = alloc_format_create(local);
+ if (ret) {
+ free_inode_info(info);
+ goto error;
+ }
+ init_inode_info_create(info, master, data);
+
+ ret = create_format(local->format,
+ loc,
+ info,
+ master);
+ if (ret) {
+ free_inode_info(info);
+ goto error;
+ }
+ local->xattr = dict_new();
+ if (!local->xattr) {
+ free_inode_info(info);
+ free_format(local);
+ goto error;
+ }
+ ret = dict_set_static_bin(local->xattr,
+ CRYPTO_FORMAT_PREFIX,
+ local->format,
+ new_format_size());
+ if (ret) {
+ dict_unref(local->xattr);
+ free_inode_info(info);
+ free_format(local);
+ goto error;
+ }
+ ret = dict_set(local->xattr, FSIZE_XATTR_PREFIX, data_from_uint64(0));
+ if (ret) {
+ dict_unref(local->xattr);
+ free_inode_info(info);
+ free_format(local);
+ goto error;
+ }
+ local->fd = fd_ref(fd);
+
+ STACK_WIND(frame,
+ crypt_create_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create,
+ loc,
+ flags,
+ mode,
+ umask,
+ fd,
+ xdata);
+ return 0;
+ error:
+ gf_log("crypt", GF_LOG_WARNING, "can not create file");
+ STACK_UNWIND_STRICT(create,
+ frame,
+ -1,
+ ret,
+ NULL, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
+}
+
+/*
+ * FIXME: this should depends on the version of format string
+ */
+static int32_t filter_crypt_xattr(dict_t *dict,
+ char *key, data_t *value, void *data)
+{
+ dict_del(dict, key);
+ return 0;
+}
+
+static int32_t crypt_fsetxattr(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ dict_foreach_fnmatch(dict, "trusted.glusterfs.crypt*",
+ filter_crypt_xattr, NULL);
+ STACK_WIND(frame,
+ default_fsetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ fd,
+ dict,
+ flags,
+ xdata);
+ return 0;
+}
+
+/*
+ * TBD: verify file metadata before wind
+ */
+static int32_t crypt_setxattr(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ dict_foreach_fnmatch(dict, "trusted.glusterfs.crypt*",
+ filter_crypt_xattr, NULL);
+ STACK_WIND(frame,
+ default_setxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ loc,
+ dict,
+ flags,
+ xdata);
+ return 0;
+}
+
+/*
+ * called as flush_cbk()
+ */
+static int32_t linkop_end(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ linkop_unwind_handler_t unwind_fn;
+ unwind_fn = linkop_unwind_dispatch(local->fop);
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0 &&
+ op_errno == ENOENT &&
+ local->loc->inode->ia_type == IA_IFLNK) {
+ local->op_ret = 0;
+ local->op_errno = 0;
+ }
+ unwind_fn(frame);
+ return 0;
+}
+
+/*
+ * unpin inode on the server
+ */
+static int32_t link_flush(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto error;
+ if (local->xdata) {
+ dict_unref(local->xdata);
+ local->xdata = NULL;
+ }
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+ local->inode = inode_ref(inode);
+ local->buf = *buf;
+ local->prebuf = *preparent;
+ local->postbuf = *postparent;
+
+ STACK_WIND(frame,
+ linkop_end,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ local->fd,
+ NULL);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ link_unwind(frame);
+ return 0;
+}
+
+void link_unwind(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+ dict_t *xdata;
+ dict_t *xattr;
+ inode_t *inode;
+
+ if (!local) {
+ STACK_UNWIND_STRICT(link,
+ frame,
+ -1,
+ ENOMEM,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL);
+ return;
+ }
+ xdata = local->xdata;
+ xattr = local->xattr;
+ inode = local->inode;
+
+ if (local->loc){
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ }
+ if (local->newloc) {
+ loc_wipe(local->newloc);
+ GF_FREE(local->newloc);
+ }
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->format)
+ GF_FREE(local->format);
+
+ STACK_UNWIND_STRICT(link,
+ frame,
+ local->op_ret,
+ local->op_errno,
+ inode,
+ &local->buf,
+ &local->prebuf,
+ &local->postbuf,
+ xdata);
+ if (xdata)
+ dict_unref(xdata);
+ if (xattr)
+ dict_unref(xattr);
+ if (inode)
+ inode_unref(inode);
+}
+
+void link_wind(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+
+ STACK_WIND(frame,
+ link_flush,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link,
+ local->loc,
+ local->newloc,
+ local->xdata);
+}
+
+/*
+ * unlink()
+ */
+static int32_t unlink_flush(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto error;
+ local->prebuf = *preparent;
+ local->postbuf = *postparent;
+ if (local->xdata) {
+ dict_unref(local->xdata);
+ local->xdata = NULL;
+ }
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ STACK_WIND(frame,
+ linkop_end,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ local->fd,
+ NULL);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ unlink_unwind(frame);
+ return 0;
+}
+
+void unlink_unwind(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+ dict_t *xdata;
+ dict_t *xattr;
+
+ if (!local) {
+ STACK_UNWIND_STRICT(unlink,
+ frame,
+ -1,
+ ENOMEM,
+ NULL,
+ NULL,
+ NULL);
+ return;
+ }
+ xdata = local->xdata;
+ xattr = local->xattr;
+ if (local->loc){
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ }
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->format)
+ GF_FREE(local->format);
+
+ STACK_UNWIND_STRICT(unlink,
+ frame,
+ local->op_ret,
+ local->op_errno,
+ &local->prebuf,
+ &local->postbuf,
+ xdata);
+ if (xdata)
+ dict_unref(xdata);
+ if (xattr)
+ dict_unref(xattr);
+}
+
+void unlink_wind(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+
+ STACK_WIND(frame,
+ unlink_flush,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink,
+ local->loc,
+ local->flags,
+ local->xdata);
+}
+
+void rename_unwind(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+ dict_t *xdata;
+ dict_t *xattr;
+ struct iatt *prenewparent;
+ struct iatt *postnewparent;
+
+ if (!local) {
+ STACK_UNWIND_STRICT(rename,
+ frame,
+ -1,
+ ENOMEM,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL);
+ return;
+ }
+ xdata = local->xdata;
+ xattr = local->xattr;
+ prenewparent = local->prenewparent;
+ postnewparent = local->postnewparent;
+
+ if (local->loc){
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ }
+ if (local->newloc){
+ loc_wipe(local->newloc);
+ GF_FREE(local->newloc);
+ }
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->format)
+ GF_FREE(local->format);
+
+ STACK_UNWIND_STRICT(rename,
+ frame,
+ local->op_ret,
+ local->op_errno,
+ &local->buf,
+ &local->prebuf,
+ &local->postbuf,
+ prenewparent,
+ postnewparent,
+ xdata);
+ if (xdata)
+ dict_unref(xdata);
+ if (xattr)
+ dict_unref(xattr);
+ if (prenewparent)
+ GF_FREE(prenewparent);
+ if (postnewparent)
+ GF_FREE(postnewparent);
+}
+
+/*
+ * called as flush_cbk()
+ */
+static int32_t rename_end(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ rename_unwind(frame);
+ return 0;
+}
+
+static int32_t rename_flush(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *buf,
+ struct iatt *preoldparent,
+ struct iatt *postoldparent,
+ struct iatt *prenewparent,
+ struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto error;
+ dict_unref(local->xdata);
+ local->xdata = NULL;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ local->buf = *buf;
+ local->prebuf = *preoldparent;
+ local->postbuf = *postoldparent;
+ if (prenewparent) {
+ local->prenewparent = GF_CALLOC(1, sizeof(*prenewparent),
+ gf_crypt_mt_iatt);
+ if (!local->prenewparent) {
+ op_errno = ENOMEM;
+ goto error;
+ }
+ *local->prenewparent = *prenewparent;
+ }
+ if (postnewparent) {
+ local->postnewparent = GF_CALLOC(1, sizeof(*postnewparent),
+ gf_crypt_mt_iatt);
+ if (!local->postnewparent) {
+ op_errno = ENOMEM;
+ goto error;
+ }
+ *local->postnewparent = *postnewparent;
+ }
+ STACK_WIND(frame,
+ rename_end,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ local->fd,
+ NULL);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ rename_unwind(frame);
+ return 0;
+}
+
+void rename_wind(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+
+ STACK_WIND(frame,
+ rename_flush,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename,
+ local->loc,
+ local->newloc,
+ local->xdata);
+}
+
+static int32_t __do_linkop(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ linkop_wind_handler_t wind_fn;
+ linkop_unwind_handler_t unwind_fn;
+
+ wind_fn = linkop_wind_dispatch(local->fop);
+ unwind_fn = linkop_unwind_dispatch(local->fop);
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret >= 0)
+ wind_fn(frame, this);
+ else {
+ gf_log(this->name, GF_LOG_WARNING, "mtd unlock failed (%d)",
+ op_errno);
+ unwind_fn(frame);
+ }
+ return 0;
+}
+
+static int32_t do_linkop(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ struct gf_flock lock = {0, };
+ crypt_local_t *local = frame->local;
+ linkop_unwind_handler_t unwind_fn;
+
+ unwind_fn = linkop_unwind_dispatch(local->fop);
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if(op_ret < 0)
+ goto error;
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND(frame,
+ __do_linkop,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ error:
+ unwind_fn(frame);
+ return 0;
+}
+
+/*
+ * Update the metadata string (against the new pathname);
+ * submit the result
+ */
+static int32_t linkop_begin(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ dict_t *xdata)
+{
+ gf_boolean_t upload_info;
+ crypt_local_t *local = frame->local;
+ crypt_private_t *priv = this->private;
+ struct crypt_inode_info *info;
+ data_t *old_mtd;
+ uint32_t new_mtd_size;
+ uint64_t value = 0;
+ void (*unwind_fn)(call_frame_t *frame);
+ void (*wind_fn)(call_frame_t *frame, xlator_t *this);
+ mtd_op_t mop;
+
+ wind_fn = linkop_wind_dispatch(local->fop);
+ unwind_fn = linkop_unwind_dispatch(local->fop);
+ mop = linkop_mtdop_dispatch(local->fop);
+
+ if (local->fd->inode->ia_type == IA_IFLNK)
+ goto wind;
+ if (op_ret < 0)
+ /*
+ * verification failed
+ */
+ goto error;
+
+ old_mtd = dict_get(xdata, CRYPTO_FORMAT_PREFIX);
+ if (!old_mtd) {
+ op_errno = EIO;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Metadata string wasn't found");
+ goto error;
+ }
+ new_mtd_size = format_size(mop, old_mtd->len);
+ op_errno = alloc_format(local, new_mtd_size);
+ if (op_errno)
+ goto error;
+ /*
+ * check for cached info
+ */
+ op_ret = inode_ctx_get(fd->inode, this, &value);
+ if (op_ret != -1) {
+ info = (struct crypt_inode_info *)(long)value;
+ if (info == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Inode info was not found");
+ op_errno = EINVAL;
+ goto error;
+ }
+ /*
+ * info was found in the cache
+ */
+ local->info = info;
+ upload_info = _gf_false;
+ }
+ else {
+ /*
+ * info wasn't found in the cache;
+ */
+ info = alloc_inode_info(local, local->loc);
+ if (!info)
+ goto error;
+ init_inode_info_head(info, fd);
+ local->info = info;
+ upload_info = _gf_true;
+ }
+ op_errno = open_format((unsigned char *)old_mtd->data,
+ old_mtd->len,
+ local->loc,
+ info,
+ get_master_cinfo(priv),
+ local,
+ upload_info);
+ if (op_errno)
+ goto error;
+ if (upload_info == _gf_true) {
+ op_errno = init_inode_info_tail(info,
+ get_master_cinfo(priv));
+ if (op_errno)
+ goto error;
+ op_errno = inode_ctx_put(fd->inode, this,
+ (uint64_t)(long)(info));
+ if (op_errno == -1) {
+ op_errno = EIO;
+ goto error;
+ }
+ }
+ /*
+ * update the format string (append/update/cup a MAC)
+ */
+ op_errno = update_format(local->format,
+ (unsigned char *)old_mtd->data,
+ old_mtd->len,
+ local->mac_idx,
+ mop,
+ local->newloc,
+ info,
+ get_master_cinfo(priv),
+ local);
+ if (op_errno)
+ goto error;
+ /*
+ * store the new format string on the server
+ */
+ if (new_mtd_size) {
+ op_errno = dict_set_static_bin(local->xattr,
+ CRYPTO_FORMAT_PREFIX,
+ local->format,
+ new_mtd_size);
+ if (op_errno)
+ goto error;
+ }
+ STACK_WIND(frame,
+ do_linkop,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ local->loc,
+ local->xattr,
+ 0,
+ NULL);
+ return 0;
+ wind:
+ wind_fn(frame, this);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ unwind_fn(frame);
+ return 0;
+}
+
+static int32_t linkop_grab_local(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc,
+ int flags, dict_t *xdata,
+ glusterfs_fop_t op)
+{
+ int32_t ret = ENOMEM;
+ fd_t *fd;
+ crypt_local_t *local;
+
+ local = crypt_alloc_local(frame, this, op);
+ if (!local)
+ goto error;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ fd = fd_create(oldloc->inode, frame->root->pid);
+ if (!fd) {
+ gf_log(this->name, GF_LOG_ERROR, "Can not create fd");
+ goto error;
+ }
+ local->fd = fd;
+ local->flags = flags;
+ local->loc = GF_CALLOC(1, sizeof(*oldloc), gf_crypt_mt_loc);
+ if (!local->loc)
+ goto error;
+ memset(local->loc, 0, sizeof(*local->loc));
+ ret = loc_copy(local->loc, oldloc);
+ if (ret) {
+ GF_FREE(local->loc);
+ local->loc = NULL;
+ goto error;
+ }
+ if (newloc) {
+ local->newloc = GF_CALLOC(1, sizeof(*newloc), gf_crypt_mt_loc);
+ if (!local->newloc) {
+ GF_FREE(local->loc);
+ loc_wipe(local->loc);
+ goto error;
+ }
+ memset(local->newloc, 0, sizeof(*local->newloc));
+ ret = loc_copy(local->newloc, newloc);
+ if (ret) {
+ GF_FREE(local->loc);
+ loc_wipe(local->loc);
+ GF_FREE(local->newloc);
+ goto error;
+ }
+ }
+ local->xattr = dict_new();
+ if (!local->xattr) {
+ gf_log(this->name, GF_LOG_ERROR, "Can not create dict");
+ ret = ENOMEM;
+ goto error;
+ }
+ return 0;
+ error:
+ if (local->xdata)
+ dict_unref(local->xdata);
+ if (local->fd)
+ fd_unref(local->fd);
+ local->fd = 0;
+ local->loc = NULL;
+ local->newloc = NULL;
+
+ local->op_ret = -1;
+ local->op_errno = ret;
+
+ return ret;
+}
+
+/*
+ * read and verify locked metadata against the old pathname (via open);
+ * update the metadata string in accordance with the new pathname;
+ * submit modified metadata;
+ * wind;
+ */
+static int32_t linkop(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc,
+ int flags,
+ dict_t *xdata,
+ glusterfs_fop_t op)
+{
+ int32_t ret;
+ dict_t *dict;
+ crypt_local_t *local;
+ void (*unwind_fn)(call_frame_t *frame);
+
+ unwind_fn = linkop_unwind_dispatch(op);
+
+ ret = linkop_grab_local(frame, this, oldloc, newloc, flags, xdata, op);
+ local = frame->local;
+ if (ret)
+ goto error;
+ dict = dict_new();
+ if (!dict) {
+ gf_log(this->name, GF_LOG_ERROR, "Can not create dict");
+ ret = ENOMEM;
+ goto error;
+ }
+ /*
+ * Set a message to crypt_open() that we need
+ * locked metadata string.
+ * All link operations (link, unlink, rename)
+ * need write lock
+ */
+ msgflags_set_mtd_wlock(&local->msgflags);
+ ret = dict_set_static_bin(dict,
+ MSGFLAGS_PREFIX,
+ &local->msgflags,
+ sizeof(local->msgflags));
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "Can not set dict");
+ dict_unref(dict);
+ goto error;
+ }
+ /*
+ * verify metadata against the old pathname
+ * and retrieve locked metadata string
+ */
+ STACK_WIND(frame,
+ linkop_begin,
+ this,
+ this->fops->open, /* crypt_open() */
+ oldloc,
+ O_RDWR,
+ local->fd,
+ dict);
+ dict_unref(dict);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = ret;
+ unwind_fn(frame);
+ return 0;
+}
+
+static int32_t crypt_link(call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ return linkop(frame, this, oldloc, newloc, 0, xdata, GF_FOP_LINK);
+}
+
+static int32_t crypt_unlink(call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int flags, dict_t *xdata)
+{
+ return linkop(frame, this, loc, NULL, flags, xdata, GF_FOP_UNLINK);
+}
+
+static int32_t crypt_rename(call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ return linkop(frame, this, oldloc, newloc, 0, xdata, GF_FOP_RENAME);
+}
+
+static void put_one_call_open(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+ if (put_one_call(local)) {
+ fd_t *fd = local->fd;
+ loc_t *loc = local->loc;
+ dict_t *xdata = local->xdata;
+
+ STACK_UNWIND_STRICT(open,
+ frame,
+ local->op_ret,
+ local->op_errno,
+ fd,
+ xdata);
+ fd_unref(fd);
+ if (xdata)
+ dict_unref(xdata);
+ loc_wipe(loc);
+ GF_FREE(loc);
+ }
+}
+
+static int32_t __crypt_readv_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ fd_t *local_fd = local->fd;
+ dict_t *local_xdata = local->xdata;
+ /* read deals with data configs only */
+ struct iovec *avec = local->data_conf.avec;
+ char **pool = local->data_conf.pool;
+ int blocks_in_pool = local->data_conf.blocks_in_pool;
+ struct iobref *iobref = local->iobref;
+ struct iobref *iobref_data = local->iobref_data;
+
+ if (op_ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "readv unlock failed (%d)", op_errno);
+ if (local->op_ret >= 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+ }
+ dump_plain_text(local, avec);
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "readv: ret_to_user: %d, iovec len: %d, ia_size: %llu",
+ (int)(local->rw_count > 0 ? local->rw_count : local->op_ret),
+ (int)(local->rw_count > 0 ? iovec_get_size(avec, local->data_conf.acount) : 0),
+ (unsigned long long)local->buf.ia_size);
+
+ STACK_UNWIND_STRICT(readv,
+ frame,
+ local->rw_count > 0 ? local->rw_count : local->op_ret,
+ local->op_errno,
+ avec,
+ avec ? local->data_conf.acount : 0,
+ &local->buf,
+ local->iobref,
+ local_xdata);
+
+ free_avec(avec, pool, blocks_in_pool);
+ fd_unref(local_fd);
+ if (local_xdata)
+ dict_unref(local_xdata);
+ if (iobref)
+ iobref_unref(iobref);
+ if (iobref_data)
+ iobref_unref(iobref_data);
+ return 0;
+}
+
+static void crypt_readv_done(call_frame_t *frame, xlator_t *this)
+{
+ if (parent_is_crypt_xlator(frame, this))
+ /*
+ * don't unlock (it will be done by the parent)
+ */
+ __crypt_readv_done(frame, NULL, this, 0, 0, NULL);
+ else {
+ crypt_local_t *local = frame->local;
+ struct gf_flock lock = {0, };
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND(frame,
+ __crypt_readv_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ }
+}
+
+static void put_one_call_readv(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+ if (put_one_call(local))
+ crypt_readv_done(frame, this);
+}
+
+static int32_t __crypt_writev_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ fd_t *local_fd = local->fd;
+ dict_t *local_xdata = local->xdata;
+ int32_t ret_to_user;
+
+ if (local->xattr)
+ dict_unref(local->xattr);
+ /*
+ * Calculate amout of butes to be returned
+ * to user. We need to subtract paddings that
+ * have been written as a part of atom.
+ */
+ /*
+ * subtract head padding
+ */
+ if (local->rw_count == 0)
+ /*
+ * Nothing has been written, it must be an error
+ */
+ ret_to_user = local->op_ret;
+ else if (local->rw_count <= local->data_conf.off_in_head) {
+ gf_log("crypt", GF_LOG_WARNING, "Incomplete write");
+ ret_to_user = 0;
+ }
+ else
+ ret_to_user = local->rw_count -
+ local->data_conf.off_in_head;
+ /*
+ * subtract tail padding
+ */
+ if (ret_to_user > local->data_conf.orig_size)
+ ret_to_user = local->data_conf.orig_size;
+
+ if (local->iobref)
+ iobref_unref(local->iobref);
+ if (local->iobref_data)
+ iobref_unref(local->iobref_data);
+ free_avec_data(local);
+ free_avec_hole(local);
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "writev: ret_to_user: %d", ret_to_user);
+
+ STACK_UNWIND_STRICT(writev,
+ frame,
+ ret_to_user,
+ local->op_errno,
+ &local->prebuf,
+ &local->postbuf,
+ local_xdata);
+ fd_unref(local_fd);
+ if (local_xdata)
+ dict_unref(local_xdata);
+ return 0;
+}
+
+static int32_t crypt_writev_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ gf_log("crypt", GF_LOG_WARNING, "can not update file size");
+
+ if (parent_is_crypt_xlator(frame, this))
+ /*
+ * don't unlock (it will be done by the parent)
+ */
+ __crypt_writev_done(frame, NULL, this, 0, 0, NULL);
+ else {
+ struct gf_flock lock = {0, };
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND(frame,
+ __crypt_writev_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ }
+ return 0;
+}
+
+static void put_one_call_writev(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+ if (put_one_call(local)) {
+ if (local->update_disk_file_size) {
+ int32_t ret;
+ /*
+ * update file size, unlock the file and unwind
+ */
+ ret = dict_set(local->xattr,
+ FSIZE_XATTR_PREFIX,
+ data_from_uint64(local->cur_file_size));
+ if (ret) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "can not set key to update file size");
+ crypt_writev_done(frame, NULL,
+ this, 0, 0, NULL);
+ return;
+ }
+ gf_log("crypt", GF_LOG_DEBUG,
+ "Updating disk file size to %llu",
+ (unsigned long long)local->cur_file_size);
+ STACK_WIND(frame,
+ crypt_writev_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ local->fd,
+ local->xattr, /* CRYPTO_FORMAT_PREFIX */
+ 0,
+ NULL);
+ }
+ else
+ crypt_writev_done(frame, NULL, this, 0, 0, NULL);
+ }
+}
+
+static int32_t __crypt_ftruncate_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ fd_t *local_fd = local->fd;
+ dict_t *local_xdata = local->xdata;
+ char *iobase = local->vec.iov_base;
+
+ if (op_ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "ftruncate unlock failed (%d)", op_errno);
+ if (local->op_ret >= 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+ }
+ if (local->iobref_data)
+ iobref_unref(local->iobref_data);
+ free_avec_data(local);
+ free_avec_hole(local);
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "ftruncate, return to user: presize=%llu, postsize=%llu",
+ (unsigned long long)local->prebuf.ia_size,
+ (unsigned long long)local->postbuf.ia_size);
+
+ STACK_UNWIND_STRICT(ftruncate,
+ frame,
+ local->op_ret < 0 ? -1 : 0,
+ local->op_errno,
+ &local->prebuf,
+ &local->postbuf,
+ local_xdata);
+ fd_unref(local_fd);
+ if (local_xdata)
+ dict_unref(local_xdata);
+ if (iobase)
+ GF_FREE(iobase);
+ return 0;
+}
+
+static int32_t crypt_ftruncate_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ struct gf_flock lock = {0, };
+
+ dict_unref(local->xattr);
+ if (op_ret < 0)
+ gf_log("crypt", GF_LOG_WARNING, "can not update file size");
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND(frame,
+ __crypt_ftruncate_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+}
+
+static void put_one_call_ftruncate(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+ if (put_one_call(local)) {
+ if (local->update_disk_file_size) {
+ int32_t ret;
+ /*
+ * update file size, unlock the file and unwind
+ */
+ ret = dict_set(local->xattr,
+ FSIZE_XATTR_PREFIX,
+ data_from_uint64(local->cur_file_size));
+ if (ret) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "can not set key to update file size");
+ crypt_ftruncate_done(frame, NULL,
+ this, 0, 0, NULL);
+ return;
+ }
+ gf_log("crypt", GF_LOG_DEBUG,
+ "Updating disk file size to %llu",
+ (unsigned long long)local->cur_file_size);
+ STACK_WIND(frame,
+ crypt_ftruncate_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ local->fd,
+ local->xattr, /* CRYPTO_FORMAT_PREFIX */
+ 0,
+ NULL);
+ }
+ else
+ crypt_ftruncate_done(frame, NULL, this, 0, 0, NULL);
+ }
+}
+
+/*
+ * load regular file size for some FOPs
+ */
+static int32_t load_file_size(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict,
+ dict_t *xdata)
+{
+ data_t *data;
+ crypt_local_t *local = frame->local;
+
+ dict_t *local_xdata = local->xdata;
+ inode_t *local_inode = local->inode;
+
+ if (op_ret < 0)
+ goto unwind;
+ /*
+ * load regular file size
+ */
+ data = dict_get(dict, FSIZE_XATTR_PREFIX);
+ if (!data) {
+ if (local->xdata)
+ dict_unref(local->xdata);
+ gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
+ op_ret = -1;
+ op_errno = EIO;
+ goto unwind;
+ }
+ local->buf.ia_size = data_to_uint64(data);
+
+ gf_log(this->name, GF_LOG_DEBUG,
+ "FOP %d: Translate regular file to %llu",
+ local->fop,
+ (unsigned long long)local->buf.ia_size);
+ unwind:
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->loc) {
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ }
+ switch (local->fop) {
+ case GF_FOP_FSTAT:
+ STACK_UNWIND_STRICT(fstat,
+ frame,
+ op_ret,
+ op_errno,
+ op_ret >= 0 ? &local->buf : NULL,
+ local->xdata);
+ break;
+ case GF_FOP_STAT:
+ STACK_UNWIND_STRICT(stat,
+ frame,
+ op_ret,
+ op_errno,
+ op_ret >= 0 ? &local->buf : NULL,
+ local->xdata);
+ break;
+ case GF_FOP_LOOKUP:
+ STACK_UNWIND_STRICT(lookup,
+ frame,
+ op_ret,
+ op_errno,
+ op_ret >= 0 ? local->inode : NULL,
+ op_ret >= 0 ? &local->buf : NULL,
+ local->xdata,
+ op_ret >= 0 ? &local->postbuf : NULL);
+ break;
+ case GF_FOP_READ:
+ STACK_UNWIND_STRICT(readv,
+ frame,
+ op_ret,
+ op_errno,
+ NULL,
+ 0,
+ op_ret >= 0 ? &local->buf : NULL,
+ NULL,
+ NULL);
+ break;
+ default:
+ gf_log(this->name, GF_LOG_WARNING,
+ "Improper file operation %d", local->fop);
+ }
+ if (local_xdata)
+ dict_unref(local_xdata);
+ if (local_inode)
+ inode_unref(local_inode);
+ return 0;
+}
+
+static int32_t crypt_stat_common_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *buf, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto unwind;
+ if (!IA_ISREG(buf->ia_type))
+ goto unwind;
+
+ local->buf = *buf;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ switch (local->fop) {
+ case GF_FOP_FSTAT:
+ STACK_WIND(frame,
+ load_file_size,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ local->fd,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ break;
+ case GF_FOP_STAT:
+ STACK_WIND(frame,
+ load_file_size,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ local->loc,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_WARNING,
+ "Improper file operation %d", local->fop);
+ }
+ return 0;
+ unwind:
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->loc) {
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ }
+ switch (local->fop) {
+ case GF_FOP_FSTAT:
+ STACK_UNWIND_STRICT(fstat,
+ frame,
+ op_ret,
+ op_errno,
+ op_ret >= 0 ? buf : NULL,
+ op_ret >= 0 ? xdata : NULL);
+ break;
+ case GF_FOP_STAT:
+ STACK_UNWIND_STRICT(stat,
+ frame,
+ op_ret,
+ op_errno,
+ op_ret >= 0 ? buf : NULL,
+ op_ret >= 0 ? xdata : NULL);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_WARNING,
+ "Improper file operation %d", local->fop);
+ }
+ return 0;
+}
+
+static int32_t crypt_fstat(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd, dict_t *xdata)
+{
+ crypt_local_t *local;
+
+ local = crypt_alloc_local(frame, this, GF_FOP_FSTAT);
+ if (!local)
+ goto error;
+ local->fd = fd_ref(fd);
+ STACK_WIND(frame,
+ crypt_stat_common_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat,
+ fd,
+ xdata);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(fstat,
+ frame,
+ -1,
+ ENOMEM,
+ NULL,
+ NULL);
+ return 0;
+}
+
+static int32_t crypt_stat(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, dict_t *xdata)
+{
+ int32_t ret;
+ crypt_local_t *local;
+
+ local = crypt_alloc_local(frame, this, GF_FOP_STAT);
+ if (!local)
+ goto error;
+ local->loc = GF_CALLOC(1, sizeof(*loc), gf_crypt_mt_loc);
+ if (!local->loc)
+ goto error;
+ memset(local->loc, 0, sizeof(*local->loc));
+ ret = loc_copy(local->loc, loc);
+ if (ret) {
+ GF_FREE(local->loc);
+ goto error;
+ }
+ STACK_WIND(frame,
+ crypt_stat_common_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat,
+ loc,
+ xdata);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(stat,
+ frame,
+ -1,
+ ENOMEM,
+ NULL,
+ NULL);
+ return 0;
+}
+
+static int32_t crypt_lookup_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto unwind;
+ if (!IA_ISREG(buf->ia_type))
+ goto unwind;
+
+ local->inode = inode_ref(inode);
+ local->buf = *buf;
+ local->postbuf = *postparent;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+ uuid_copy(local->loc->gfid, buf->ia_gfid);
+
+ STACK_WIND(frame,
+ load_file_size,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ local->loc,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ return 0;
+ unwind:
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ STACK_UNWIND_STRICT(lookup,
+ frame,
+ op_ret,
+ op_errno,
+ inode,
+ buf,
+ xdata,
+ postparent);
+ return 0;
+}
+
+static int32_t crypt_lookup(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, dict_t *xdata)
+{
+ int32_t ret;
+ crypt_local_t *local;
+
+ local = crypt_alloc_local(frame, this, GF_FOP_LOOKUP);
+ if (!local)
+ goto error;
+ local->loc = GF_CALLOC(1, sizeof(*loc), gf_crypt_mt_loc);
+ if (!local->loc)
+ goto error;
+ memset(local->loc, 0, sizeof(*local->loc));
+ ret = loc_copy(local->loc, loc);
+ if (ret) {
+ GF_FREE(local->loc);
+ goto error;
+ }
+ gf_log(this->name, GF_LOG_DEBUG, "Lookup %s", loc->path);
+ STACK_WIND(frame,
+ crypt_lookup_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup,
+ loc,
+ xdata);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(lookup,
+ frame,
+ -1,
+ ENOMEM,
+ NULL,
+ NULL,
+ NULL,
+ NULL);
+ return 0;
+}
+
+/*
+ * for every regular directory entry find its real file size
+ * and update stat's buf properly
+ */
+static int32_t crypt_readdirp_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ gf_dirent_t *entries, dict_t *xdata)
+{
+ gf_dirent_t *entry = NULL;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ list_for_each_entry (entry, (&entries->list), list) {
+ data_t *data;
+
+ if (!IA_ISREG(entry->d_stat.ia_type))
+ continue;
+ data = dict_get(entry->dict, FSIZE_XATTR_PREFIX);
+ if (!data){
+ gf_log("crypt", GF_LOG_WARNING,
+ "Regular file size of direntry not found");
+ op_errno = EIO;
+ op_ret = -1;
+ break;
+ }
+ entry->d_stat.ia_size = data_to_uint64(data);
+ }
+ unwind:
+ STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, xdata);
+ return 0;
+}
+
+/*
+ * ->readdirp() fills in-core inodes, so we need to set proper
+ * file sizes for all directory entries of the parent @fd.
+ * Actual updates take place in ->crypt_readdirp_cbk()
+ */
+static int32_t crypt_readdirp(call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset,
+ dict_t *xdata)
+{
+ int32_t ret = ENOMEM;
+
+ if (!xdata) {
+ xdata = dict_new();
+ if (!xdata)
+ goto error;
+ }
+ else
+ dict_ref(xdata);
+ /*
+ * make sure that we'll have real file sizes at ->readdirp_cbk()
+ */
+ ret = dict_set(xdata, FSIZE_XATTR_PREFIX, data_from_uint64(0));
+ if (ret) {
+ dict_unref(xdata);
+ goto error;
+ }
+ STACK_WIND(frame,
+ crypt_readdirp_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp,
+ fd,
+ size,
+ offset,
+ xdata);
+ dict_unref(xdata);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(readdirp, frame, -1, ret, NULL, NULL);
+ return 0;
+}
+
+static int32_t crypt_access(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t mask, dict_t *xdata)
+{
+ gf_log(this->name, GF_LOG_WARNING,
+ "NFS mounts of encrypted volumes are unsupported");
+ STACK_UNWIND_STRICT(access, frame, -1, EPERM, NULL);
+ return 0;
+}
+
+int32_t master_set_block_size (xlator_t *this, crypt_private_t *priv,
+ dict_t *options)
+{
+ uint64_t block_size = 0;
+ struct master_cipher_info *master = get_master_cinfo(priv);
+
+ if (options != NULL)
+ GF_OPTION_RECONF("block-size", block_size, options,
+ size, error);
+ else
+ GF_OPTION_INIT("block-size", block_size, size, error);
+
+ switch (block_size) {
+ case 512:
+ master->m_block_bits = 9;
+ break;
+ case 1024:
+ master->m_block_bits = 10;
+ break;
+ case 2048:
+ master->m_block_bits = 11;
+ break;
+ case 4096:
+ master->m_block_bits = 12;
+ break;
+ default:
+ gf_log("crypt", GF_LOG_ERROR,
+ "FATAL: unsupported block size %llu",
+ (unsigned long long)block_size);
+ goto error;
+ }
+ return 0;
+ error:
+ return -1;
+}
+
+int32_t master_set_alg(xlator_t *this, crypt_private_t *priv)
+{
+ struct master_cipher_info *master = get_master_cinfo(priv);
+ master->m_alg = AES_CIPHER_ALG;
+ return 0;
+}
+
+int32_t master_set_mode(xlator_t *this, crypt_private_t *priv)
+{
+ struct master_cipher_info *master = get_master_cinfo(priv);
+ master->m_mode = XTS_CIPHER_MODE;
+ return 0;
+}
+
+/*
+ * set key size in bits to the master info
+ * Pre-conditions: cipher mode in the master info is uptodate.
+ */
+static int master_set_data_key_size (xlator_t *this, crypt_private_t *priv,
+ dict_t *options)
+{
+ int32_t ret;
+ uint64_t key_size = 0;
+ struct master_cipher_info *master = get_master_cinfo(priv);
+
+ if (options != NULL)
+ GF_OPTION_RECONF("data-key-size", key_size, options,
+ size, error);
+ else
+ GF_OPTION_INIT("data-key-size", key_size, size, error);
+
+ ret = data_cipher_algs[master->m_alg][master->m_mode].check_key(key_size);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "FATAL: wrong bin key size %llu for alg %d mode %d",
+ (unsigned long long)key_size,
+ (int)master->m_alg,
+ (int)master->m_mode);
+ goto error;
+ }
+ master->m_dkey_size = key_size;
+ return 0;
+ error:
+ return -1;
+}
+
+static int is_hex(char *s) {
+ return ('0' <= *s && *s <= '9') || ('a' <= *s && *s <= 'f');
+}
+
+static int parse_hex_buf(xlator_t *this, char *src, unsigned char *dst,
+ int hex_size)
+{
+ int i;
+ int hex_byte = 0;
+
+ for (i = 0; i < (hex_size / 2); i++) {
+ if (!is_hex(src + i*2) || !is_hex(src + i*2 + 1)) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "FATAL: not hex symbol in key");
+ return -1;
+ }
+ if (sscanf(src + i*2, "%2x", &hex_byte) != 1) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "FATAL: can not parse hex key");
+ return -1;
+ }
+ dst[i] = hex_byte & 0xff;
+ }
+ return 0;
+}
+
+/*
+ * Parse options;
+ * install master volume key
+ */
+int32_t master_set_master_vol_key(xlator_t *this, crypt_private_t *priv)
+{
+ int32_t ret;
+ FILE *file = NULL;
+
+ int32_t key_size;
+ char *opt_key_file_pathname = NULL;
+
+ unsigned char bin_buf[MASTER_VOL_KEY_SIZE];
+ char hex_buf[2 * MASTER_VOL_KEY_SIZE];
+
+ struct master_cipher_info *master = get_master_cinfo(priv);
+ /*
+ * extract master key passed via option
+ */
+ GF_OPTION_INIT("master-key", opt_key_file_pathname, path, bad_key);
+
+ if (!opt_key_file_pathname) {
+ gf_log(this->name, GF_LOG_ERROR, "FATAL: missing master key");
+ return -1;
+ }
+ gf_log(this->name, GF_LOG_DEBUG, "handling file key %s",
+ opt_key_file_pathname);
+
+ file = fopen(opt_key_file_pathname, "r");
+ if (file == NULL) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "FATAL: can not open file with master key");
+ return -1;
+ }
+ /*
+ * extract hex key
+ */
+ key_size = fread(hex_buf, 1, sizeof(hex_buf), file);
+ if (key_size < sizeof(hex_buf)) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "FATAL: master key is too short");
+ goto bad_key;
+ }
+ ret = parse_hex_buf(this, hex_buf, bin_buf, key_size);
+ if (ret)
+ goto bad_key;
+ memcpy(master->m_key, bin_buf, MASTER_VOL_KEY_SIZE);
+ memset(hex_buf, 0, sizeof(hex_buf));
+ fclose(file);
+
+ memset(bin_buf, 0, sizeof(bin_buf));
+ return 0;
+ bad_key:
+ gf_log(this->name, GF_LOG_ERROR, "FATAL: bad master key");
+ if (file)
+ fclose(file);
+ memset(bin_buf, 0, sizeof(bin_buf));
+ return -1;
+}
+
+/*
+ * Derive volume key for object-id authentication
+ */
+int32_t master_set_nmtd_vol_key(xlator_t *this, crypt_private_t *priv)
+{
+ return get_nmtd_vol_key(get_master_cinfo(priv));
+}
+
+int32_t crypt_init_xlator(xlator_t *this)
+{
+ int32_t ret;
+ crypt_private_t *priv = this->private;
+
+ ret = master_set_alg(this, priv);
+ if (ret)
+ return ret;
+ ret = master_set_mode(this, priv);
+ if (ret)
+ return ret;
+ ret = master_set_block_size(this, priv, NULL);
+ if (ret)
+ return ret;
+ ret = master_set_data_key_size(this, priv, NULL);
+ if (ret)
+ return ret;
+ ret = master_set_master_vol_key(this, priv);
+ if (ret)
+ return ret;
+ return master_set_nmtd_vol_key(this, priv);
+}
+
+static int32_t crypt_alloc_private(xlator_t *this)
+{
+ this->private = GF_CALLOC(1, sizeof(crypt_private_t), gf_crypt_mt_priv);
+ if (!this->private) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "Can not allocate memory for private data");
+ return ENOMEM;
+ }
+ return 0;
+}
+
+static void crypt_free_private(xlator_t *this)
+{
+ crypt_private_t *priv = this->private;
+ if (priv) {
+ memset(priv, 0, sizeof(*priv));
+ GF_FREE(priv);
+ }
+}
+
+int32_t reconfigure (xlator_t *this, dict_t *options)
+{
+ int32_t ret = -1;
+ crypt_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("crypt", this, error);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, error);
+ GF_VALIDATE_OR_GOTO (this->name, options, error);
+
+ priv = this->private;
+
+ ret = master_set_block_size(this, priv, options);
+ if (ret) {
+ gf_log("this->name", GF_LOG_ERROR,
+ "Failed to reconfure block size");
+ goto error;
+ }
+ ret = master_set_data_key_size(this, priv, options);
+ if (ret) {
+ gf_log("this->name", GF_LOG_ERROR,
+ "Failed to reconfure data key size");
+ goto error;
+ }
+ return 0;
+ error:
+ return ret;
+}
+
+int32_t init(xlator_t *this)
+{
+ int32_t ret;
+
+ if (!this->children || this->children->next) {
+ gf_log ("crypt", GF_LOG_ERROR,
+ "FATAL: crypt should have exactly one child");
+ return EINVAL;
+ }
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+ ret = crypt_alloc_private(this);
+ if (ret)
+ return ret;
+ ret = crypt_init_xlator(this);
+ if (ret)
+ goto error;
+ this->local_pool = mem_pool_new(crypt_local_t, 64);
+ if (!this->local_pool) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ ret = ENOMEM;
+ goto error;
+ }
+ gf_log ("crypt", GF_LOG_INFO, "crypt xlator loaded");
+ return 0;
+ error:
+ crypt_free_private(this);
+ return ret;
+}
+
+void fini (xlator_t *this)
+{
+ crypt_free_private(this);
+}
+
+struct xlator_fops fops = {
+ .readv = crypt_readv,
+ .writev = crypt_writev,
+ .truncate = crypt_truncate,
+ .ftruncate = crypt_ftruncate,
+ .setxattr = crypt_setxattr,
+ .fsetxattr = crypt_fsetxattr,
+ .link = crypt_link,
+ .unlink = crypt_unlink,
+ .rename = crypt_rename,
+ .open = crypt_open,
+ .create = crypt_create,
+ .stat = crypt_stat,
+ .fstat = crypt_fstat,
+ .lookup = crypt_lookup,
+ .readdirp = crypt_readdirp,
+ .access = crypt_access
+};
+
+struct xlator_cbks cbks = {
+ .forget = crypt_forget
+};
+
+struct volume_options options[] = {
+ { .key = {"master-key"},
+ .type = GF_OPTION_TYPE_PATH,
+ .description = "Pathname of regular file which contains master volume key"
+ },
+ { .key = {"data-key-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .description = "Data key size (bits)",
+ .min = 256,
+ .max = 512,
+ .default_value = "256",
+ },
+ { .key = {"block-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .description = "Atom size (bits)",
+ .min = 512,
+ .max = 4096,
+ .default_value = "4096"
+ },
+ { .key = {NULL} },
+};
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
diff --git a/xlators/encryption/crypt/src/crypt.h b/xlators/encryption/crypt/src/crypt.h
new file mode 100644
index 000000000..01a8542ab
--- /dev/null
+++ b/xlators/encryption/crypt/src/crypt.h
@@ -0,0 +1,899 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __CRYPT_H__
+#define __CRYPT_H__
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+#include <openssl/aes.h>
+#include <openssl/evp.h>
+#include <openssl/sha.h>
+#include <openssl/hmac.h>
+#include <openssl/cmac.h>
+#include <openssl/modes.h>
+#include "crypt-mem-types.h"
+
+#define CRYPT_XLATOR_ID (0)
+
+#define MAX_IOVEC_BITS (3)
+#define MAX_IOVEC (1 << MAX_IOVEC_BITS)
+#define KEY_FACTOR_BITS (6)
+
+#define DEBUG_CRYPT (0)
+#define TRIVIAL_TFM (0)
+
+#define CRYPT_MIN_BLOCK_BITS (9)
+#define CRYPT_MAX_BLOCK_BITS (12)
+
+#define MASTER_VOL_KEY_SIZE (32)
+#define NMTD_VOL_KEY_SIZE (16)
+
+struct crypt_key {
+ uint32_t len;
+ const char *label;
+};
+
+/*
+ * Add new key types to the end of this
+ * enumeration but before LAST_KEY_TYPE
+ */
+typedef enum {
+ MASTER_VOL_KEY,
+ NMTD_VOL_KEY,
+ NMTD_LINK_KEY,
+ EMTD_FILE_KEY,
+ DATA_FILE_KEY_256,
+ DATA_FILE_KEY_512,
+ LAST_KEY_TYPE
+}crypt_key_type;
+
+struct kderive_context {
+ const unsigned char *pkey;/* parent key */
+ uint32_t pkey_len; /* parent key size, bits */
+ uint32_t ckey_len; /* child key size, bits */
+ unsigned char *fid; /* fixed input data, NIST 800-108, 5.1 */
+ uint32_t fid_len; /* fid len, bytes */
+ unsigned char *out; /* contains child keying material */
+ uint32_t out_len; /* out len, bytes */
+};
+
+typedef enum {
+ DATA_ATOM,
+ HOLE_ATOM,
+ LAST_DATA_TYPE
+}atom_data_type;
+
+typedef enum {
+ HEAD_ATOM,
+ TAIL_ATOM,
+ FULL_ATOM,
+ LAST_LOCALITY_TYPE
+}atom_locality_type;
+
+typedef enum {
+ MTD_CREATE,
+ MTD_APPEND,
+ MTD_OVERWRITE,
+ MTD_CUT,
+ MTD_LAST_OP
+} mtd_op_t;
+
+struct xts128_context {
+ void *key1, *key2;
+ block128_f block1,block2;
+};
+
+struct object_cipher_info {
+ cipher_alg_t o_alg;
+ cipher_mode_t o_mode;
+ uint32_t o_block_bits;
+ uint32_t o_dkey_size; /* raw data key size in bits */
+ union {
+ struct {
+ unsigned char ivec[16];
+ AES_KEY dkey[2];
+ AES_KEY tkey; /* key used for tweaking */
+ XTS128_CONTEXT xts;
+ } aes_xts;
+ } u;
+};
+
+struct master_cipher_info {
+ /*
+ * attributes inherited by newly created regular files
+ */
+ cipher_alg_t m_alg;
+ cipher_mode_t m_mode;
+ uint32_t m_block_bits;
+ uint32_t m_dkey_size; /* raw key size in bits */
+ /*
+ * master key
+ */
+ unsigned char m_key[MASTER_VOL_KEY_SIZE];
+ /*
+ * volume key for oid authentication
+ */
+ unsigned char m_nmtd_key[NMTD_VOL_KEY_SIZE];
+};
+
+/*
+* This info is not changed during file's life
+ */
+struct crypt_inode_info {
+#if DEBUG_CRYPT
+ loc_t *loc; /* pathname that the file has been
+ opened, or created with */
+#endif
+ uint16_t nr_minor;
+ uuid_t oid;
+ struct object_cipher_info cinfo;
+};
+
+/*
+ * this should locate in secure memory
+ */
+typedef struct {
+ struct master_cipher_info master;
+} crypt_private_t;
+
+static inline struct master_cipher_info *get_master_cinfo(crypt_private_t *priv)
+{
+ return &priv->master;
+}
+
+static inline struct object_cipher_info *get_object_cinfo(struct crypt_inode_info
+ *info)
+{
+ return &info->cinfo;
+}
+
+/*
+ * this describes layouts and properties
+ * of atoms in an aligned vector
+ */
+struct avec_config {
+ uint32_t atom_size;
+ atom_data_type type;
+ size_t orig_size;
+ off_t orig_offset;
+ size_t expanded_size;
+ off_t aligned_offset;
+
+ uint32_t off_in_head;
+ uint32_t off_in_tail;
+ uint32_t gap_in_tail;
+ uint32_t nr_full_blocks;
+
+ struct iovec *avec; /* aligned vector */
+ uint32_t acount; /* number of avec components. The same
+ * as number of occupied logical blocks */
+ char **pool;
+ uint32_t blocks_in_pool;
+ uint32_t cursor; /* makes sense only for ordered writes,
+ * so there is no races on this counter.
+ *
+ * Cursor is per-config object, we don't
+ * reset cursor for atoms of different
+ * localities (head, tail, full)
+ */
+};
+
+
+typedef struct {
+ glusterfs_fop_t fop; /* code of FOP this local info built for */
+ fd_t *fd;
+ inode_t *inode;
+ loc_t *loc;
+ int32_t mac_idx;
+ loc_t *newloc;
+ int32_t flags;
+ int32_t wbflags;
+ struct crypt_inode_info *info;
+ struct iobref *iobref;
+ struct iobref *iobref_data;
+ off_t offset;
+
+ uint64_t old_file_size; /* per FOP, retrieved under lock held */
+ uint64_t cur_file_size; /* per iteration, before issuing IOs */
+ uint64_t new_file_size; /* per iteration, after issuing IOs */
+
+ uint64_t io_offset; /* offset of IOs issued per iteration */
+ uint64_t io_offset_nopad; /* offset of user's data in the atom */
+ uint32_t io_size; /* size of IOs issued per iteration */
+ uint32_t io_size_nopad; /* size of user's data in the IOs */
+ uint32_t eof_padding_size; /* size od EOF padding in the IOs */
+
+ gf_lock_t call_lock; /* protect nr_calls from many cbks */
+ int32_t nr_calls;
+
+ atom_data_type active_setup; /* which setup (hole or date)
+ is currently active */
+ /* data setup */
+ struct avec_config data_conf;
+
+ /* hole setup */
+ int hole_conv_in_proggress;
+ gf_lock_t hole_lock; /* protect hole config from many cbks */
+ int hole_handled;
+ struct avec_config hole_conf;
+ struct iatt buf;
+ struct iatt prebuf;
+ struct iatt postbuf;
+ struct iatt *prenewparent;
+ struct iatt *postnewparent;
+ int32_t op_ret;
+ int32_t op_errno;
+ int32_t rw_count; /* total read or written */
+ gf_lock_t rw_count_lock; /* protect the counter above */
+ unsigned char *format; /* for create, update format string */
+ uint32_t format_size;
+ uint32_t msgflags; /* messages for crypt_open() */
+ dict_t *xdata;
+ dict_t *xattr;
+ struct iovec vec; /* contains last file's atom for
+ read-prune-write sequence */
+ gf_boolean_t custom_mtd;
+ /*
+ * the next 3 fields are used by readdir and friends
+ */
+ gf_dirent_t *de; /* directory entry */
+ char *de_path; /* pathname of directory entry */
+ uint32_t de_prefix_len; /* lenght of the parent's pathname */
+ gf_dirent_t *entries;
+
+ uint32_t update_disk_file_size:1;
+} crypt_local_t;
+
+/* This represents a (read)modify-write atom */
+struct rmw_atom {
+ atom_locality_type locality;
+ /*
+ * read-modify-write sequence of the atom
+ */
+ int32_t (*rmw)(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata);
+ /*
+ * offset of the logical block in a file
+ */
+ loff_t (*offset_at)(call_frame_t *frame,
+ struct object_cipher_info *object);
+ /*
+ * IO offset in an atom
+ */
+ uint32_t (*offset_in)(call_frame_t *frame,
+ struct object_cipher_info *object);
+ /*
+ * number of bytes of plain text of this atom that user
+ * wants to read/write.
+ * It can be smaller than atom_size in the case of head
+ * or tail atoms.
+ */
+ uint32_t (*io_size_nopad)(call_frame_t *frame,
+ struct object_cipher_info *object);
+ /*
+ * which iovec represents the atom
+ */
+ struct iovec *(*get_iovec)(call_frame_t *frame, uint32_t count);
+ /*
+ * how many bytes of partial block should be uptodated by
+ * reading from disk.
+ * This is used to perform a read component of RMW (read-modify-write).
+ */
+ uint32_t (*count_to_uptodate)(call_frame_t *frame, struct object_cipher_info *object);
+ struct avec_config *(*get_config)(call_frame_t *frame);
+};
+
+struct data_cipher_alg {
+ gf_boolean_t atomic; /* true means that algorithm requires
+ to pad data before cipher transform */
+ gf_boolean_t should_pad; /* true means that algorithm requires
+ to pad the end of file with extra-data */
+ uint32_t blkbits; /* blksize = 1 << blkbits */
+ /*
+ * any preliminary sanity checks goes here
+ */
+ int32_t (*init)(void);
+ /*
+ * set alg-mode specific inode info
+ */
+ int32_t (*set_private)(struct crypt_inode_info *info,
+ struct master_cipher_info *master);
+ /*
+ * check alg-mode specific data key
+ */
+ int32_t (*check_key)(uint32_t key_size);
+ void (*set_iv)(off_t offset, struct object_cipher_info *object);
+ int32_t (*encrypt)(const unsigned char *from, unsigned char *to,
+ size_t length, off_t offset, const int enc,
+ struct object_cipher_info *object);
+};
+
+/*
+ * version-dependent metadata loader
+ */
+struct crypt_mtd_loader {
+ /*
+ * return core format size
+ */
+ size_t (*format_size)(mtd_op_t op, size_t old_size);
+ /*
+ * pack version-specific metadata of an object
+ * at ->create()
+ */
+ int32_t (*create_format)(unsigned char *wire,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master);
+ /*
+ * extract version-specific metadata of an object
+ * at ->open() time
+ */
+ int32_t (*open_format)(unsigned char *wire,
+ int32_t len,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local,
+ gf_boolean_t load_info);
+ int32_t (*update_format)(unsigned char *new,
+ unsigned char *old,
+ size_t old_len,
+ int32_t mac_idx,
+ mtd_op_t op,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local);
+};
+
+typedef int32_t (*end_writeback_handler_t)(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata);
+typedef void (*linkop_wind_handler_t)(call_frame_t *frame, xlator_t *this);
+typedef void (*linkop_unwind_handler_t)(call_frame_t *frame);
+
+
+/* Declarations */
+
+/* keys.c */
+extern struct crypt_key crypt_keys[LAST_KEY_TYPE];
+int32_t get_nmtd_vol_key(struct master_cipher_info *master);
+int32_t get_nmtd_link_key(loc_t *loc,
+ struct master_cipher_info *master,
+ unsigned char *result);
+int32_t get_emtd_file_key(struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ unsigned char *result);
+int32_t get_data_file_key(struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ uint32_t keysize,
+ unsigned char *key);
+/* data.c */
+extern struct data_cipher_alg data_cipher_algs[LAST_CIPHER_ALG][LAST_CIPHER_MODE];
+void encrypt_aligned_iov(struct object_cipher_info *object,
+ struct iovec *vec,
+ int count,
+ off_t off);
+void decrypt_aligned_iov(struct object_cipher_info *object,
+ struct iovec *vec,
+ int count,
+ off_t off);
+int32_t align_iov_by_atoms(xlator_t *this,
+ crypt_local_t *local,
+ struct object_cipher_info *object,
+ struct iovec *vec /* input vector */,
+ int32_t count /* number of vec components */,
+ struct iovec *avec /* aligned vector */,
+ char **blocks /* pool of blocks */,
+ uint32_t *blocks_allocated,
+ struct avec_config *conf);
+int32_t set_config_avec_data(xlator_t *this,
+ crypt_local_t *local,
+ struct avec_config *conf,
+ struct object_cipher_info *object,
+ struct iovec *vec,
+ int32_t vec_count);
+int32_t set_config_avec_hole(xlator_t *this,
+ crypt_local_t *local,
+ struct avec_config *conf,
+ struct object_cipher_info *object,
+ glusterfs_fop_t fop);
+void set_gap_at_end(call_frame_t *frame, struct object_cipher_info *object,
+ struct avec_config *conf, atom_data_type dtype);
+void set_config_offsets(call_frame_t *frame,
+ xlator_t *this,
+ uint64_t offset,
+ uint64_t count,
+ atom_data_type dtype,
+ int32_t setup_gap_in_tail);
+
+/* metadata.c */
+extern struct crypt_mtd_loader mtd_loaders [LAST_MTD_LOADER];
+
+int32_t alloc_format(crypt_local_t *local, size_t size);
+int32_t alloc_format_create(crypt_local_t *local);
+void free_format(crypt_local_t *local);
+size_t format_size(mtd_op_t op, size_t old_size);
+size_t new_format_size(void);
+int32_t open_format(unsigned char *str, int32_t len, loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master, crypt_local_t *local,
+ gf_boolean_t load_info);
+int32_t update_format(unsigned char *new, unsigned char *old,
+ size_t old_len, int32_t mac_idx, mtd_op_t op, loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local);
+int32_t create_format(unsigned char *wire,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master);
+
+/* atom.c */
+struct rmw_atom *atom_by_types(atom_data_type data,
+ atom_locality_type locality);
+void submit_partial(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ atom_locality_type ltype);
+void submit_full(call_frame_t *frame, xlator_t *this);
+
+/* crypt.c */
+
+end_writeback_handler_t dispatch_end_writeback(glusterfs_fop_t fop);
+static size_t iovec_get_size(struct iovec *vec, uint32_t count);
+void set_local_io_params_writev(call_frame_t *frame,
+ struct object_cipher_info *object,
+ struct rmw_atom *atom, off_t io_offset,
+ uint32_t io_size);
+void link_wind(call_frame_t *frame, xlator_t *this);
+void unlink_wind(call_frame_t *frame, xlator_t *this);
+void link_unwind(call_frame_t *frame);
+void unlink_unwind(call_frame_t *frame);
+void rename_wind(call_frame_t *frame, xlator_t *this);
+void rename_unwind(call_frame_t *frame);
+
+/* Inline functions */
+
+static inline size_t iovec_get_size(struct iovec *vec, uint32_t count)
+{
+ int i;
+ size_t size = 0;
+ for (i = 0; i < count; i++)
+ size += vec[i].iov_len;
+ return size;
+}
+
+static inline int32_t crypt_xlator_id(void)
+{
+ return CRYPT_XLATOR_ID;
+}
+
+static inline mtd_loader_id current_mtd_loader(void)
+{
+ return MTD_LOADER_V1;
+}
+
+static inline uint32_t master_key_size (void)
+{
+ return crypt_keys[MASTER_VOL_KEY].len >> 3;
+}
+
+static inline uint32_t nmtd_vol_key_size (void)
+{
+ return crypt_keys[NMTD_VOL_KEY].len >> 3;
+}
+
+static inline uint32_t alg_mode_blkbits(cipher_alg_t alg,
+ cipher_mode_t mode)
+{
+ return data_cipher_algs[alg][mode].blkbits;
+}
+
+static inline uint32_t alg_mode_blksize(cipher_alg_t alg,
+ cipher_mode_t mode)
+{
+ return 1 << alg_mode_blkbits(alg, mode);
+}
+
+static inline gf_boolean_t alg_mode_atomic(cipher_alg_t alg,
+ cipher_mode_t mode)
+{
+ return data_cipher_algs[alg][mode].atomic;
+}
+
+static inline gf_boolean_t alg_mode_should_pad(cipher_alg_t alg,
+ cipher_mode_t mode)
+{
+ return data_cipher_algs[alg][mode].should_pad;
+}
+
+static inline uint32_t master_alg_blksize(struct master_cipher_info *mr)
+{
+ return alg_mode_blksize(mr->m_alg, mr->m_mode);
+}
+
+static inline uint32_t master_alg_blkbits(struct master_cipher_info *mr)
+{
+ return alg_mode_blkbits(mr->m_alg, mr->m_mode);
+}
+
+static inline gf_boolean_t master_alg_atomic(struct master_cipher_info *mr)
+{
+ return alg_mode_atomic(mr->m_alg, mr->m_mode);
+}
+
+static inline gf_boolean_t master_alg_should_pad(struct master_cipher_info *mr)
+{
+ return alg_mode_should_pad(mr->m_alg, mr->m_mode);
+}
+
+static inline uint32_t object_alg_blksize(struct object_cipher_info *ob)
+{
+ return alg_mode_blksize(ob->o_alg, ob->o_mode);
+}
+
+static inline uint32_t object_alg_blkbits(struct object_cipher_info *ob)
+{
+ return alg_mode_blkbits(ob->o_alg, ob->o_mode);
+}
+
+static inline gf_boolean_t object_alg_atomic(struct object_cipher_info *ob)
+{
+ return alg_mode_atomic(ob->o_alg, ob->o_mode);
+}
+
+static inline gf_boolean_t object_alg_should_pad(struct object_cipher_info *ob)
+{
+ return alg_mode_should_pad(ob->o_alg, ob->o_mode);
+}
+
+static inline uint32_t aes_raw_key_size(struct master_cipher_info *master)
+{
+ return master->m_dkey_size >> 3;
+}
+
+static inline struct avec_config *get_hole_conf(call_frame_t *frame)
+{
+ return &(((crypt_local_t *)frame->local)->hole_conf);
+}
+
+static inline struct avec_config *get_data_conf(call_frame_t *frame)
+{
+ return &(((crypt_local_t *)frame->local)->data_conf);
+}
+
+static inline int32_t get_atom_bits (struct object_cipher_info *object)
+{
+ return object->o_block_bits;
+}
+
+static inline int32_t get_atom_size (struct object_cipher_info *object)
+{
+ return 1 << get_atom_bits(object);
+}
+
+static inline int32_t has_head_block(struct avec_config *conf)
+{
+ return conf->off_in_head ||
+ (conf->acount == 1 && conf->off_in_tail);
+}
+
+static inline int32_t has_tail_bloc